if __name__ == '__main__': SET_NAME = params['val_set'] VAL_GT = f'datasets/{params["project_name"]}/annotations/instances_{SET_NAME}.json' VAL_IMGS = f'datasets/{params["project_name"]}/{SET_NAME}/' MAX_IMAGES = 10000 coco_gt = COCO(VAL_GT) image_ids = coco_gt.getImgIds()[:MAX_IMAGES] if override_prev_results or not os.path.exists( f'{SET_NAME}_bbox_results.json'): model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales'])) model.load_state_dict( torch.load(weights_path, map_location=torch.device('cpu'))) model.requires_grad_(False) model.eval() if use_cuda: model.cuda(gpu) if use_float16: model.half() image_ids = evaluate_coco(VAL_IMGS, SET_NAME, image_ids, coco_gt, model) _eval(coco_gt, image_ids, f'{SET_NAME}_bbox_results.json')
class Model(): def __init__(self, compound_coef=0, force_input_size=512, threshold=0.2, iou_threshold=0.2): self.compound_coef = compound_coef self.force_input_size = force_input_size # set None to use default size self.threshold = threshold self.iou_threshold = iou_threshold self.use_cuda = True self.use_float16 = False cudnn.fastest = True cudnn.benchmark = True self.obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # tf bilinear interpolation is different from any other's, just make do self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.input_size = self.input_sizes[self.compound_coef] if self.force_input_size is None else self.force_input_size self.model = EfficientDetBackbone( compound_coef=self.compound_coef, num_classes=len(self.obj_list)) self.model.load_state_dict(torch.load( f'weights/efficientdet-d{self.compound_coef}.pth')) self.model.requires_grad_(False) self.model.eval() if self.use_cuda: self.model = self.model.cuda() if self.use_float16: self.model = self.model.half() def predict(self, raw_img): self.ori_imgs, self.framed_imgs, self.framed_metas = preprocess_raw(raw_img, max_size=self.input_size) if self.use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in self.framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in self.framed_imgs], 0) x = x.to(torch.float32 if not self.use_float16 else torch.float16).permute(0, 3, 1, 2) with torch.no_grad(): self.features, self.regression, self.classification, self.anchors = self.model(x) self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() out = postprocess(x, self.anchors, self.regression, self.classification, self.regressBoxes, self.clipBoxes, self.threshold, self.iou_threshold) pred = invert_affine(self.framed_metas, out) return pred def label_img(self, preds, imgs): for i in range(len(imgs)): if len(preds[i]['rois']) == 0: continue for j in range(len(preds[i]['rois'])): (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int) cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2) obj = self.obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j]) cv2.putText(imgs[i], '{}, {:.3f}'.format(obj, score), (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1) return imgs def run(self, raw_img): pred_label = self.predict(raw_img) pred_img = self.label_img(pred_label, self.ori_imgs) return pred_img[0]
def excuteModel(videoname): # Video's path # set int to use webcam, set str to read from a video file if videoname is not None: video_src = os.path.join(r'D:\GitHub\Detection\server\uploads', f"{videoname}.mp4") else: video_src = 'D:\\GitHub\\Detection\\server\AImodel\\videotest\\default.mp4' compound_coef = 2 trained_weights = 'D:\\GitHub\\Detection\\server\\AImodel\\weights\\efficientdet-video.pth' force_input_size = None # set None to use default size threshold = 0.2 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size # load model model = EfficientDetBackbone( compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(trained_weights)) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() # function for display # Box regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() # Video capture cap = cv2.VideoCapture(video_src) length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) writer = None # try to determine the total number of frames in the video file try: prop = cv2.cv.CV_CAP_PROP_FRAME_COUNT if imutils.is_cv2() \ else cv2.CAP_PROP_FRAME_COUNT total = int(vs.get(prop)) print("[INFO] {} total frames in video".format(total)) # an error occurred while trying to determine the total # number of frames in the video file except: print("[INFO] could not determine # of frames in video") total = -1 path_out = os.path.join(os.path.dirname( os.path.abspath(__file__)), 'outvideo') path_result = r"D:\GitHub\Detection\server\AImodel\videotest\default.mp4" path_asset = r"D:\GitHub\Detection\client\src\assets" for i in range(0, length): ret, frame = cap.read() if not ret: break # frame preprocessing ori_imgs, framed_imgs, framed_metas = preprocess_video( frame, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) # model predict with torch.no_grad(): features, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # result out = invert_affine(framed_metas, out) img_show = display(out, ori_imgs, obj_list) if writer is None: # initialize our video writer fourcc = 0x00000021 #fourcc = cv2.VideoWriter_fourcc(*'mp4v') if videoname is not None: path_result = os.path.join(path_out, f"{videoname}.mp4") else: path_result = os.path.join(path_out, "default.mp4") writer = cv2.VideoWriter(path_result, fourcc, 30, (img_show.shape[1], img_show.shape[0]), True) # write the output frame to disk writer.write(img_show) print("Processing data... " + str(round((i+1)/length, 3)*100) + " %") # show frame by frame #cv2.imshow('frame', img_show) if cv2.waitKey(1) & 0xFF == ord('q'): break print("[INFO] cleaning up...") writer.release() cap.release() cv2.destroyAllWindows() if videoname is not None: path_asset = os.path.join(path_asset, f"{videoname}.mp4") else: path_asset = os.path.join(path_asset, "default.mp4") copyfile(path_result, path_asset) return path_asset
class EfficientDet(object): obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] def __init__(self, weightfile, score_thresh, nms_thresh, is_xywh=True, use_cuda=True, use_float16=False): print('Loading weights from %s... Done!' % (weightfile)) # constants self.score_thresh = score_thresh self.nms_thresh = nms_thresh self.use_cuda = use_cuda self.is_xywh = is_xywh compound_coef = 0 force_input_size = None # set None to use default size self.use_float16 = False cudnn.fastest = True cudnn.benchmark = True # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.input_size = input_sizes[compound_coef] if \ force_input_size is None else force_input_size # load model self.model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(self.obj_list)) # f'weights/efficientdet-d{compound_coef}.pth' self.model.load_state_dict(torch.load(weightfile)) self.model.requires_grad_(False) self.model.eval() if self.use_cuda: self.model = self.model.cuda() if self.use_float16: self.model = self.model.half() # Box self.regressBoxes = BBoxTransform() self.clipBoxes = ClipBoxes() def __call__(self, imgs): # frame preprocessing _, framed_imgs, framed_metas = preprocess(imgs, max_size=self.input_size) if self.use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) dtype = torch.float32 if not self.use_float16 else torch.float16 x = x.to(dtype).permute(0, 3, 1, 2) # model predict with torch.no_grad(): features, regression, classification, anchors = self.model(x) out = postprocess(x, anchors, regression, classification, self.regressBoxes, self.clipBoxes, self.score_thresh, self.nms_thresh) # result out = invert_affine(framed_metas, out) if len(out) == 0: return None, None, None rois = [o['rois'] for o in out] scores = [o['scores'] for o in out] class_ids = [o['class_ids'] for o in out] if self.is_xywh: return xyxy_to_xywh(rois), scores, class_ids else: return rois, scores, class_ids
class PTVisionService(PTServingBaseService): def __init__(self, model_name, model_path): # 调用父类构造方法 super(PTVisionService, self).__init__(model_name, model_path) # 调用自定义函数加载模型 checkpoint_file = model_path params = yaml.safe_load( open(f'/home/mind/model/projects/{cfg.project}.yml')) self.model = EfficientDetBackbone( compound_coef=cfg.compound_coef, num_classes=len(cfg.category), ratios=eval(params['anchors_ratios']), scales=eval(params['anchors_scales'])) self.model.load_state_dict( torch.load(checkpoint_file, map_location=torch.device('cpu'))) self.model.requires_grad_(False) self.model.eval() # self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.input_sizes = [512, 896, 768, 896, 1024, 1280, 1280, 1536] self.class_dict = dict([val, key] for key, val in cfg.category.items()) def _preprocess(self, data): # https两种请求形式 # 1. form-data文件格式的请求对应:data = {"请求key值":{"文件名":<文件io>}} # 2. json格式对应:data = json.loads("接口传入的json体") imgs_path = [] for k, v in data.items(): for file_name, file_content in v.items(): imgs_path.append(file_content) return imgs_path def _inference(self, imgs_path): results = [] regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() for img_path in imgs_path: ori_imgs, framed_imgs, framed_metas = preprocess( [img_path], max_size=self.input_sizes[cfg.compound_coef]) x = torch.from_numpy(framed_imgs[0]).float() x = x.unsqueeze(0).permute(0, 3, 1, 2) features, regression, classification, anchors = self.model(x) preds = self._my_postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, cfg.threshold, cfg.nms_threshold) preds = invert_affine(framed_metas, preds)[0] scores = preds['scores'] class_ids = preds['class_ids'] rois = preds['rois'] image_result = { 'detection_classes': [], 'detection_boxes': [], 'detection_scores': [] } if rois.shape[0] > 0: bbox_score = scores for roi_id in range(rois.shape[0]): score = float(bbox_score[roi_id]) label = int(class_ids[roi_id]) box = rois[roi_id, :] image_result['detection_classes'].append( self.class_dict[label + 1]) image_result['detection_boxes'].append(box.tolist()) image_result['detection_scores'].append(score) results.append(image_result) return results def _postprocess(self, data): if len(data) == 1: return data[0] else: return data def _my_postprocess(self, x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold): transformed_anchors = regressBoxes(anchors, regression) transformed_anchors = clipBoxes(transformed_anchors, x) scores = torch.max(classification, dim=2, keepdim=True)[0] scores_over_thresh = (scores > threshold)[:, :, 0] out = [] for i in range(x.shape[0]): if scores_over_thresh[i].sum() == 0: out.append({ 'rois': np.array(()), 'class_ids': np.array(()), 'scores': np.array(()), }) continue classification_per = classification[i, scores_over_thresh[i, :], ...].permute(1, 0) transformed_anchors_per = transformed_anchors[ i, scores_over_thresh[i, :], ...] scores_per = scores[i, scores_over_thresh[i, :], ...] scores_, classes_ = classification_per.max(dim=0) anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=iou_threshold) if anchors_nms_idx.shape[0] != 0: classes_ = classes_[anchors_nms_idx] scores_ = scores_[anchors_nms_idx] boxes_ = transformed_anchors_per[anchors_nms_idx, :] boxes_ = boxes_[:, [1, 0, 3, 2]] out.append({ 'rois': boxes_.numpy(), 'class_ids': classes_.numpy(), 'scores': scores_.numpy(), }) else: out.append({ 'rois': np.array(()), 'class_ids': np.array(()), 'scores': np.array(()), }) return out
def main(i): compound_coef = i force_input_size = None # set None to use default size # replace this part with your project's anchor config anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)] threshold = 0.2 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] out_dict = dict() input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() base_dir = '/data/jiashenc/jackson/' print('Processing Det-' + str(i)) for k in range(1000000, 1100000): if k % 1000 == 0: print(' Finish {} frames'.format(k + 1)) img_path = os.path.join(base_dir, 'frame{}.jpg'.format(k)) ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) to_json(out, out_dict) with open(os.path.join(base_dir, '10', 'res-{:d}.json'.format(i)), 'w') as f: json.dump(out_dict, f) out_dict = dict()
def batch_inference(args): input_size = input_sizes[args.compound_coef] model = EfficientDetBackbone(compound_coef=args.compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) # load pth file model.load_state_dict(torch.load(args.pth, map_location='cpu')) model.requires_grad_(False) model.eval() if args.use_cuda: model = model.cuda(device=args.device) path = args.file_list imgpath = args.img_path content = [] with open(path, 'r') as f_in: lines = f_in.readlines() for idx in range(len(lines)): line = lines[idx] line = line.strip().split(' ') content.append(line[0]) for i in tqdm(range(len(content)), ncols=88): filebasename = content[i] img_path = os.path.join(imgpath, filebasename + '.jpg') try: ori_imgs, framed_imgs, framed_metas = eval_preprocess( img_path, max_size=input_size) except: f'{img_path.split("/")[-1]} is not in {args.img_path}' if args.use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32).permute(0, 3, 1, 2) with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = Rotation_BBoxTransform() clipBoxes = ClipBoxes() addBoxes = BBoxAddScores() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, addBoxes, args.score_threshold, args.iou_threshold) out = invert_affine(framed_metas, out) file_name = ['Task1_large-vehicle.txt', 'Task1_small-vehicle.txt'] rois = out[0]['rois'] class_ids = out[0]['class_ids'] scores = out[0]['scores'] filecontent = [] for ii in range(len(scores)): xmin, ymin, xmax, ymax, theta = rois[ii] rect = OPENCV2xywh([xmin, ymin, xmax, ymax, theta])[0].tolist() x1, y1 = float(rect[0][0]), float(rect[0][1]) x2, y2 = float(rect[1][0]), float(rect[1][1]) x3, y3 = float(rect[2][0]), float(rect[2][1]) x4, y4 = float(rect[3][0]), float(rect[3][1]) single_filecontent = [ int(class_ids[ii]), filebasename, float(scores[ii]), x1, y1, x2, y2, x3, y3, x4, y4 ] filecontent.append(single_filecontent) write_into_txt(file_name, filecontent)
def efficientDet_video_inference(video_src,compound_coef = 0,force_input_size=None, frame_skipping = 3, threshold=0.2,out_path=None,imshow=False, display_fps=False): #deep-sort variables # Definition of the parameters max_cosine_distance = 0.3 nn_budget = None nms_max_overlap = 1.0 model_filename = '/home/shaheryar/Desktop/Projects/Football-Monitoring/deep_sort/model_weights/mars-small128.pb' encoder = gdet.create_box_encoder(model_filename, batch_size=1) metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric,n_init=5) # efficientDet-pytorch variables iou_threshold = 0.4 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size # load model model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() # Video capture cap = cv2.VideoCapture(video_src) frame_width = int(cap.get(3)) frame_height = int(cap.get(4)) fourcc = cv2.VideoWriter_fourcc(*'MPEG') fps = cap.get(cv2.CAP_PROP_FPS) print("Video fps",fps) if(out_path is not None): outp = cv2.VideoWriter(out_path, fourcc, fps, (frame_width, frame_height)) i=0 start= time.time() current_frame_fps=0 while True: ret, frame = cap.read() if not ret: break t1=time.time() if (frame_skipping==0 or i%frame_skipping==0): # if(True): # frame preprocessing (running detections) ori_imgs, framed_imgs, framed_metas, t1 = preprocess_video(frame, width=input_size, height=input_size) if use_cuda: x = torch.stack([fi.cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) # model predict t1=time.time() with torch.no_grad(): features, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # Post processing out = invert_affine(framed_metas, out) # decoding bbox ,object name and scores boxes,classes,scores =decode_predictions(out[0]) org_boxes = boxes.copy() t2 = time.time() - t1 # feature extraction for deep sort boxes = [convert_bbox_to_deep_sort_format(frame.shape, b) for b in boxes] features = encoder(frame,boxes) detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxes, features)] boxes = np.array([d.tlwh for d in detections]) # print(boxes) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] tracker.predict() tracker.update(detections) i = i + 1 img_show=frame.copy() for j in range(len(org_boxes)): img_show =drawBoxes(img_show,org_boxes[j],(255,255,0),str(tracker.tracks[j].track_id)) for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue bbox = track.to_tlbr() x1=int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2=int(bbox[3]) roi= frame[y1:y2,x1:x2] cv2.rectangle(img_show, (x1, y1), (x2, y2), update_color_association(roi, track.track_id), 2) cv2.putText(img_show, str(track.track_id), (x1, y1), 0, 5e-3 * 100, (255, 255, 0), 1) if display_fps: current_frame_fps=1/t2 else: current_frame_fps=0 cv2.putText(img_show, 'FPS: {0:.2f}'.format(current_frame_fps), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) if (i % int(fps) == 0): print("Processed ", str(int(i / fps)), "seconds") print("Time taken",time.time()-start) # print(color_dict) if imshow: img_show=cv2.resize(img_show,(0,0),fx=0.75,fy=0.75) cv2.imshow('Frame',img_show) # Press Q on keyboard to exit if cv2.waitKey(1) & 0xFF == ord('q'): break if out_path is not None: outp.write(img_show) cap.release() outp.release()
def getImageDetections(imagePath, weights, nms_threshold, confidenceParam, coefficient): """ Runs the detections and returns all detection into a single structure. Parameters ---------- imagePath : str Path to all images. weights : str path to the weights. nms_threshold : float non-maximum supression threshold. confidenceParam : float confidence score for the detections (everything above this threshold is considered a valid detection). coefficient : int coefficient of the current efficientdet model (from d1 to d7). Returns ------- detectionsList : List return a list with all predicted bounding-boxes. """ compound_coef = coefficient force_input_size = None # set None to use default size img_path = imagePath threshold = confidenceParam iou_threshold = nms_threshold use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['class_name'] # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute(0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), # replace this part with your project's anchor config ratios=[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)], scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]) model.load_state_dict(torch.load(rootDir+'logs/' + project + '/' + weights)) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) for i in range(len(ori_imgs)): if len(out[i]['rois']) == 0: continue detectionsList = [] for j in range(len(out[i]['rois'])): (x1, y1, x2, y2) = out[i]['rois'][j].astype(np.int) detectionsList.append((float(out[i]['scores'][j]), x1, y1, x2, y2)) return detectionsList
def infer(self, image): img = np.array(image) img = img[:, :, ::-1] #rgb 2 bgr anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] threshold = 0.25 iou_threshold = 0.25 force_input_size = None use_cuda = False use_float16 = False cudnn.fastest = False cudnn.benchmark = False input_size = 512 ori_imgs, framed_imgs, framed_metas = preprocess(img, max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=0, num_classes=len(self.labels), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(self.path, map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) pred = invert_affine(framed_metas, out) results = [] for i in range(len(ori_imgs)): if len(pred[i]['rois']) == 0: continue ori_imgs[i] = ori_imgs[i].copy() for j in range(len(pred[i]['rois'])): xt1, yt1, xbr, ybr = pred[i]['rois'][j].astype(np.float64) xt1 = float(xt1) yt1 = float(yt1) xbr = float(xbr) yb4 = float(ybr) obj = str(pred[i]['class_ids'][j]) obj_label = self.labels.get(obj) obj_score = str(pred[i]['scores'][j]) results.append({ "confidence": str(obj_score), "label": obj_label, "points": [xt1, yt1, xbr, ybr], "type": "rectangle", }) return results
class ObjectDetectionService(PTServingBaseService): def __init__(self, model_name, model_path): # effdet self.model_name = model_name self.model_path = os.path.join(os.path.dirname(__file__), 'models_best.pth') self.input_image_key = 'images' self.anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] self.anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] self.compound_coef = 0 self.threshold = 0.2 self.iou_threshold = 0.2 self.obj_list = [ '一次性快餐盒', '书籍纸张', '充电宝', '剩饭剩菜', '包', '垃圾桶', '塑料器皿', '塑料玩具', '塑料衣架', '大骨头', '干电池', '快递纸袋', '插头电线', '旧衣服', '易拉罐', '枕头', '果皮果肉', '毛绒玩具', '污损塑料', '污损用纸', '洗护用品', '烟蒂', '牙签', '玻璃器皿', '砧板', '筷子', '纸盒纸箱', '花盆', '茶叶渣', '菜帮菜叶', '蛋壳', '调料瓶', '软膏', '过期药物', '酒瓶', '金属厨具', '金属器皿', '金属食品罐', '锅', '陶瓷器皿', '鞋', '食用油桶', '饮料瓶', '鱼骨' ] self.input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] self.input_size = self.input_sizes[self.compound_coef] self.model = EfficientDetBackbone(compound_coef=self.compound_coef, num_classes=len(self.obj_list), ratios=self.anchor_ratios, scales=self.anchor_scales) self.model.load_state_dict( torch.load(self.model_path, map_location='cpu')) self.model.requires_grad_(False) self.model.eval() def _preprocess(self, data): preprocessed_data = {} for k, v in data.items(): for file_name, file_content in v.items(): ori_imgs, framed_imgs, framed_metas = preprocess( file_content, max_size=self.input_size) preprocessed_data[k] = [framed_imgs, framed_metas] return preprocessed_data def _inference(self, data): """ model inference function Here are a inference example of resnet, if you use another model, please modify this function """ framed_imgs, framed_metas = data[self.input_image_key] if torch.cuda.is_available(): x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) self.model = self.model.cuda() else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32).permute(0, 3, 1, 2) #if use_float16: # model = model.half() with torch.no_grad(): features, regression, classification, anchors = self.model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, self.threshold, self.iou_threshold) out = invert_affine(framed_metas, out) result = OrderedDict() result['detection_classes'] = [] result['detection_scores'] = [] result['detection_boxes'] = [] for i in range(len(out)): if len(out[i]['rois']) == 0: continue for j in range(len(out[i]['rois'])): x1, y1, x2, y2 = out[i]['rois'][j].astype(np.int) result['detection_boxes'].append([x1, y1, x2, y2]) obj = self.obj_list[out[i]['class_ids'][j]] result['detection_classes'].append(obj) score = float(out[i]['scores'][j]) result['detection_scores'].append(score) return result def _postprocess(self, data): return data def inference(self, data): ''' Wrapper function to run preprocess, inference and postprocess functions. Parameters ---------- data : map of object Raw input from request. Returns ------- list of outputs to be sent back to client. data to be sent back ''' pre_start_time = time.time() data = self._preprocess(data) infer_start_time = time.time() # Update preprocess latency metric pre_time_in_ms = (infer_start_time - pre_start_time) * 1000 logger.info('preprocess time: ' + str(pre_time_in_ms) + 'ms') if self.model_name + '_LatencyPreprocess' in MetricsManager.metrics: MetricsManager.metrics[self.model_name + '_LatencyPreprocess'].update(pre_time_in_ms) data = self._inference(data) infer_end_time = time.time() infer_in_ms = (infer_end_time - infer_start_time) * 1000 logger.info('infer time: ' + str(infer_in_ms) + 'ms') data = self._postprocess(data) # Update inference latency metric post_time_in_ms = (time.time() - infer_end_time) * 1000 logger.info('postprocess time: ' + str(post_time_in_ms) + 'ms') if self.model_name + '_LatencyInference' in MetricsManager.metrics: MetricsManager.metrics[self.model_name + '_LatencyInference'].update(post_time_in_ms) # Update overall latency metric if self.model_name + '_LatencyOverall' in MetricsManager.metrics: MetricsManager.metrics[self.model_name + '_LatencyOverall'].update(pre_time_in_ms + post_time_in_ms) logger.info('latency: ' + str(pre_time_in_ms + infer_in_ms + post_time_in_ms) + 'ms') data['latency_time'] = str( round(pre_time_in_ms + infer_in_ms + post_time_in_ms, 1)) + ' ms' return data
def EfficientDetNode(): rospy.init_node('efficient_det_node', anonymous=True) rospy.Subscriber('input', String, image_callback, queue_size=1) pub = rospy.Publisher('/image_detections', Detection2DArray, queue_size=10) rate = rospy.Rate(1) # 10hz path_list = os.listdir(path) path_list.sort(key=lambda x: int(x.split('.')[0])) stamp_file = open(stamp_path) stamp_lines = stamp_file.readlines() stamp_i = 0 for filename in path_list: img_path = filename cur_frame = img_path[:-4] img_path = path + "/" + img_path cur_stamp = ((float)(stamp_lines[stamp_i][-13:].strip('\n'))) # cur_stamp = rospy.Time.from_sec( # ((float)(stamp_lines[stamp_i][-13:].strip('\n')))) stamp_i += 1 detection_results = Detection2DArray() # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[ compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict( torch.load(f'weights/efficientdet-d{compound_coef}.pth', map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) display(cur_frame, out, ori_imgs, imshow=False, imwrite=True) for i in range(len(out)): for j in range(len(out[i]['rois'])): x1, y1, x2, y2 = out[i]['rois'][j].astype(np.int) obj = obj_list[out[i]['class_ids'][j]] score = float(out[i]['scores'][j]) result = ObjectHypothesisWithPose() result.score = score if (obj == 'car'): result.id = 0 if (obj == 'person'): result.id = 1 if (obj == 'cyclist'): result.id = 2 detection_msg = Detection2D() detection_msg.bbox.center.x = (x1 + x2) / 2 detection_msg.bbox.center.y = (y1 + y2) / 2 detection_msg.bbox.size_x = x2 - x1 detection_msg.bbox.size_y = y2 - y1 detection_msg.results.append(result) detection_results.detections.append(detection_msg) rospy.loginfo("%d: %lf", detection_msg.results[0].id, detection_msg.results[0].score) detection_results.header.seq = cur_frame #detection_results.header.stamp = cur_stamp rospy.loginfo(detection_results.header.stamp) pub.publish(detection_results) if not os.path.exists(txt_path): os.makedirs(txt_path) #with open(f'txt/{cur_frame}.txt', 'w') as f: with open(f'{txt_path}/{cur_frame}.txt', 'w') as f: #f.write(str((float)(stamp_lines[stamp_i][-13:].strip('\n'))) + "\n") f.write(str(cur_stamp) + "\n") for detection in detection_results.detections: f.write(str(detection.bbox.center.x) + " ") f.write(str(detection.bbox.center.y) + " ") f.write(str(detection.bbox.size_x) + " ") f.write(str(detection.bbox.size_y) + " ") f.write(str(detection.results[0].id) + " ") f.write(str(detection.results[0].score) + "\n") f.close() rate.sleep() print('running speed test...') with torch.no_grad(): print('test1: model inferring and postprocessing') print('inferring image for 10 times...') t1 = time.time() for _ in range(10): _, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) t2 = time.time() tact_time = (t2 - t1) / 10 print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1')
def test(opt): compound_coef = 2 force_input_size = None # set None to use default size img_id = opt.img_id img_path = opt.img_path img_path = img_path + str(img_id) + '.jpg' # replace this part with your project's anchor config anchor_ratios = [(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)] anchor_scales = [2**0, 2**(1.0 / 3.0), 2**(2.0 / 3.0)] threshold = 0.2 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = ['02010001', '02010002'] color_list = standard_to_bgr(STANDARD_COLORS) input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536] input_size = input_sizes[ compound_coef] if force_input_size is None else force_input_size ori_imgs, framed_imgs, framed_metas = preprocess(img_path, max_size=input_size) if use_cuda: x = torch.stack([torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list), ratios=anchor_ratios, scales=anchor_scales) model.load_state_dict(torch.load(opt.weights, map_location='cpu')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() with torch.no_grad(): features, regression, classification, anchors = model(x) regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) def display(preds, imgs, imshow=True, imwrite=False, img_id=1): for i in range(len(imgs)): if len(preds[i]['rois']) == 0: continue imgs[i] = imgs[i].copy() imgs[i] = cv2.cvtColor(imgs[i], cv2.COLOR_BGR2RGB) for j in range(len(preds[i]['rois'])): x1, y1, x2, y2 = preds[i]['rois'][j].astype(np.int) obj = obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j]) plot_one_box(imgs[i], [x1, y1, x2, y2], label=obj, score=score, color=color_list[get_index_label(obj, obj_list)]) if imshow: cv2.imshow('img', imgs[i]) cv2.waitKey(0) if imwrite: str1 = 'test/' + str(img_id) + '.jpg' cv2.imwrite(str1, imgs[i]) out = invert_affine(framed_metas, out) display(out, ori_imgs, imshow=False, imwrite=True, img_id=img_id) print('running speed test...') with torch.no_grad(): print('test1: model inferring and postprocessing') print('inferring image for 10 times...') t1 = time.time() for _ in range(10): _, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) out = invert_affine(framed_metas, out) tempList = [] for j in range(len(out[0]['class_ids'])): tempout = {} tempout['image_id'] = img_id if out[0]['class_ids'][j] == 1: tempout['category_id'] = 2 else: tempout['category_id'] = 1 tempout['score'] = out[0]['scores'][j].astype(np.float64) tempout['bbox'] = [ (out[0]['rois'][j][0]).astype(np.float64), (out[0]['rois'][j][1]).astype(np.float64), (out[0]['rois'][j][2]).astype(np.float64) - (out[0]['rois'][j][0]).astype(np.float64), (out[0]['rois'][j][3]).astype(np.float64) - (out[0]['rois'][j][1]).astype(np.float64), ] tempList.append(tempout) t2 = time.time() tact_time = (t2 - t1) / 10 print(f'{tact_time} seconds, {1 / tact_time} FPS, @batch_size 1') with open("test/" + str(img_id) + ".json", "w") as f: json.dump(tempList, f) print("生成标注后的图片(" + str(img_id) + ".jpg)和json(" + str(img_id) + ".json)到test文件夹中...")
model_1.load_state_dict( torch.load( f'/data/efdet/logs/{project1}/weights/{save_time1}/efficientdet-d{compound_coef}_{number}.pth', map_location='cpu')) # model 2 model_2 = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list_2), ratios=anchor_ratios, scales=anchor_scales) model_2.load_state_dict( torch.load( f'/data/efdet/logs/{project}/crop/weights/{save_time2}/efficientdet-d{compound_coef}_{number}.pth', map_location='cpu')) model_1.requires_grad_(False) model_1.eval() model_2.requires_grad_(False) model_2.eval() if use_cuda: model_1 = model_1.cuda() model_2 = model_2.cuda() if use_float16: model_1 = model_1.half() model_2 = model_2.half() def display(out_1, out_2, imgs, imshow=True, showtime=0, imwrite=False): # if len(preds[i]['rois']) == 0: # if model dosen't detect object, not show image
def effdet_detection(content, effdet): video_src = 0 # set int to use webcam, set str to read from a video file compound_coef = 0 force_input_size = None # set None to use default size threshold = 0.5 iou_threshold = 0.2 use_cuda = True use_float16 = False cudnn.fastest = True cudnn.benchmark = True obj_list = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', '', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', '', 'backpack', 'umbrella', '', '', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', '', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', '', 'dining table', '', '', 'toilet', '', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', '', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush' ] # tf bilinear interpolation is different from any other's, just make do input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536] input_size = input_sizes[ compound_coef] if force_input_size is None else force_input_size # load model model = EfficientDetBackbone(compound_coef=compound_coef, num_classes=len(obj_list)) model.load_state_dict( torch.load(f'weights/efficientdet-d{compound_coef}.pth')) model.requires_grad_(False) model.eval() if use_cuda: model = model.cuda() if use_float16: model = model.half() # function for display def display(preds, imgs, content, effdet): for i in range(len(imgs)): if len(preds[i]['rois']) == 0: return imgs[i] for j in range(len(preds[i]['rois'])): (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int) #cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2) obj = obj_list[preds[i]['class_ids'][j]] score = float(preds[i]['scores'][j]) if obj == content: effdet.send_message_to_scratch( (x1 + x2) * 0.5 * 0.625 - 200) #发送指定类别的识别框位置到scratch print((x1 + x2) * 0.5 * 0.625 - 200) cv2.rectangle(imgs[i], (x1, y1), (x2, y2), (255, 255, 0), 2) cv2.putText(imgs[i], '{}, {:.3f}'.format(obj, score), (x1, y1 + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 0), 1) return imgs[i] # Box regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() # Video capture cap = cv2.VideoCapture(video_src) while True: ret, frame = cap.read() if not ret: break # frame preprocessing ori_imgs, framed_imgs, framed_metas = preprocess_video( frame, max_size=input_size) if use_cuda: x = torch.stack( [torch.from_numpy(fi).cuda() for fi in framed_imgs], 0) else: x = torch.stack([torch.from_numpy(fi) for fi in framed_imgs], 0) x = x.to(torch.float32 if not use_float16 else torch.float16).permute( 0, 3, 1, 2) # model predict with torch.no_grad(): features, regression, classification, anchors = model(x) out = postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold) # result out = invert_affine(framed_metas, out) img_show = display(out, ori_imgs, content, effdet) # show frame by frame cv2.imshow('frame', img_show) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def __init__(self, video_src: str, video_output: str, text_output: str, obj_list: list, input_sizes: list, reid_cpkt: str, compound_coef: int, force_input_size=None, threshold=0.2, iou_threshold=0.2, use_cuda=True, use_float16=False, cudnn_fastest=True, cudnn_benchmark=True, max_dist=0.2, min_confidence=0.3, nms_max_overlap=0.5, max_iou_distance=0.7, max_age=70, n_init=3, nn_budget=100, selected_target=None): # I/O # Video's path self.video_src = video_src # set int to use webcam, set str to read from a video file self.video_output = video_output # output to the specific position # text path self.text_output = text_output # output to the file with the csv format # DETECTOR self.compound_coef = compound_coef self.force_input_size = force_input_size # set None to use default size self.threshold = threshold self.iou_threshold = iou_threshold self.use_cuda = use_cuda self.use_float16 = use_float16 cudnn.fastest = cudnn_fastest cudnn.benchmark = cudnn_benchmark # coco_name self.obj_list = obj_list # input size self.input_sizes = input_sizes self.input_size = input_sizes[self.compound_coef] if force_input_size is None else force_input_size # load detector model model = EfficientDetBackbone(compound_coef=self.compound_coef, num_classes=len(obj_list)) model.load_state_dict(torch.load(f'weights/efficientdet-d{self.compound_coef}.pth')) model.requires_grad_(False) model.eval() if self.use_cuda and torch.cuda.is_available(): self.detector = model.cuda() if self.use_float16: self.detector = model.half() # TRACKER self.reid_cpkt = reid_cpkt self.max_dist = max_dist self.min_confidence = min_confidence self.nms_max_overlap = nms_max_overlap self.max_iou_distance = max_iou_distance self.max_age = max_age self.n_init = n_init self.nn_budget = nn_budget # load tracker model, self.trackers = [] self.selected_target = selected_target for num in range(0, len(self.selected_target)): self.trackers.append(build_tracker(reid_cpkt, max_dist, min_confidence, nms_max_overlap, max_iou_distance, max_age, n_init, nn_budget, use_cuda)) # video frames self.frame_id = 0