def process_detections(det, img_shape, img0): """Process detections.""" output_dict = {"shellfishDetection": list()} gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to img0 size det[:, :4] = scale_coords(img_shape, det[:, :4], img0.shape).round() # Write results for *xyxy, conf, cls in reversed(det): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh output_dict["shellfishDetection"].append({ "boundingPoly": { "normalizedVertices": [{ "x": xywh[0], "y": xywh[1], "width": xywh[2], "height": xywh[3], }] }, "name": NAMES[int(cls)], "score": float(conf.numpy()), }) label = '%s %.2f' % (NAMES[int(cls)], conf) plot_one_box(xyxy, img0, label=label, color=COLORS[int(cls)], line_thickness=3) return output_dict
def corner_detection(corner_model, imgple, im0, xyxy, colors, cls, c1, c2): imgple_ori = deepcopy(imgple) imgple = cv2.cvtColor(imgple, cv2.COLOR_BGR2RGB) h_ple, w_ple, _ = imgple.shape imgple = Image.fromarray(imgple).convert('RGB') imgple = test_data_transforms(imgple) if torch.cuda.is_available(): imgple = imgple.unsqueeze(0).cuda() else: imgple = imgple.unsqueeze(0) output = corner_model(imgple) luc_x, luc_y, ldc_x, ldc_y, rdc_x, rdc_y, ruc_x, ruc_y = tuple( output.detach().cpu().numpy()[0]) luc_xo, luc_yo, ldc_xo, ldc_yo, rdc_xo, rdc_yo, ruc_xo, ruc_yo = int(luc_x * w_ple), int(luc_y * h_ple), \ int(ldc_x * w_ple), int(ldc_y * h_ple), \ int(rdc_x * w_ple), int(rdc_y * h_ple), \ int(ruc_x * w_ple), int(ruc_y * h_ple) luc_x, luc_y, ldc_x, ldc_y, rdc_x, rdc_y, ruc_x, ruc_y = int(luc_x * w_ple + c1[0]), int(luc_y * h_ple + c1[1]), \ int(ldc_x * w_ple + c1[0]), int(ldc_y * h_ple + c1[1]), \ int(rdc_x * w_ple + c1[0]), int(rdc_y * h_ple + c1[1]), \ int(ruc_x * w_ple + c1[0]), int(ruc_y * h_ple + c1[1]) cv2.circle(im0, (rdc_x, rdc_y), 3, [255, 0, 0], 1) cv2.circle(im0, (ldc_x, ldc_y), 3, [255, 0, 0], 1) cv2.circle(im0, (luc_x, luc_y), 3, [255, 0, 0], 1) cv2.circle(im0, (ruc_x, ruc_y), 3, [255, 0, 0], 1) mean_width = 190 # np.mean(widths) mean_height = 60 # np.mean(height) plot_one_box(xyxy, im0, color=colors[int(cls)], line_thickness=3) start_points = [[luc_xo, luc_yo], [ruc_xo, ruc_yo], [ldc_xo, ldc_yo], [rdc_xo, rdc_yo]] warp_img = warp(mean_height, mean_width, start_points, imgple_ori) return warp_img, im0
def main_process(input_img): img0 = input_img.copy() img = letterbox(img0, new_shape=imgsz)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() img /= 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) t1 = time_synchronized() pred = model(img, augment=True)[0] pred = non_max_suppression(pred, my_confidence, my_threshold, classes=my_filterclasses, agnostic=None) t2 = time_synchronized() total = 0 for i, det in enumerate(pred): gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() for *xyxy, conf, cls in reversed(det): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() label = '%sbaht (%.0f%%)' % (names[int(cls)], conf*100) total += int(names[int(cls)]) plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3) print(label) print('Done. (%.3fs)' % (t2 - t1)) # cv2.rectangle(img0,(0,10),(250,90),(0,0,0),-1) img0 = cv2.putText(img0, "total "+str(total)+" Baht", (10,45+30*3), cv2.FONT_HERSHEY_DUPLEX, 1, (0,0,255), 2) return img0
def objectdetect(frame, count): dict_object = {} img = letterbox(frame, new_shape=imgsz)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=opt.augment)[0] pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes) for index, detect in enumerate(pred): if detect is not None and len(detect): # Rescale boxes from img_size to im0 size detect[:, :4] = scale_coords(img.shape[2:], detect[:, :4], frame.shape).round() for *xyxy, conf, cls in detect: label = names[int(cls)] x1, y1, x2, y2 = int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int( xyxy[3]) dict_object[label] = frame[y1:y2, x1:x2] plot_one_box(xyxy, frame, label=label, color=colors[int(cls)], line_thickness=3) cv2.imwrite(f'images/frame{count}.jpg', frame) plt.imshow(frame) plt.show() return dict_object
def webcam_out(q1, q2, q3, q4): score = 0 Before_flag = False while True: if not q1.empty(): frame = q1.get() if not q2.empty(): q2num, label, colors, name = q2.get() q4.put(q2num) if not q3.empty(): poses, num = q3.get() try: frame2 = frame[num[1]:num[3], num[0]:num[2]].copy() canvas, score, Before_flag, status = draw_person_pose( frame2, poses, score, Before_flag, name) frame[num[1]:num[3], num[0]:num[2]] = canvas except: pass try: plot_one_box(q2num, frame, label=label, color=colors, line_thickness=3) except: pass cv2.imshow("webcam", frame) if cv2.waitKey(1) > 0: break
def draw_bbox(img, pred, boxes): img_c = img.copy() if boxes.shape != torch.Size([0]): for box in boxes: x1 = int((box[1] - box[3] // 2)) y1 = int((box[2] - box[4] // 2)) x2 = int((box[1] + box[3] // 2)) y2 = int((box[2] + box[4] // 2)) cv2.rectangle(img, (x1, y1), (x2, y2), (255, 0, 0), 2) if pred != None: for box in pred: new_line = None x1 = int(box[0]) y1 = int(box[1]) x2 = int(box[2]) y2 = int(box[3]) conf = box[4] cls = box[5] # if cls == 0: # x = int((x2 - x1)/2+x1) # y = int((y2 - y1)/2+y1) # (x,y1) # (x,y2) # (x1,y) # (x2,y) # 四分辅助线 # cv2.line(img,(x,y1),(x,y2),(114,114,114),2) # cv2.line(img,(x1,y),(x2,y),(114,114,114),2) # 判断车头车尾 # for box1 in pred: # cls1 = box1[5] # if cls1 > 5: # x1_ = int(box1[0]) # y1_ = int(box1[1]) # x2_ = int(box1[2]) # y2_ = int(box1[3]) # if x1_>x1 and x2_<x2 and y1_>y1 and y2_<y2: # c_y1_ = (y2_-y1_)/2+y1_ # d = c_y1_ - y1 # if d > (y2 - y1)/2: # new_line = ' FRONT' obj_conf = box[6] cls_conf = box[7] text = '%s|%.2f' % (names[int(cls)], conf) cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 2) cv2.putText(img, text, (x1, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2) label = '%s %.2f %.5f %.5f' % (names[int(cls)], conf, obj_conf, cls_conf) if new_line: label += new_line plot_one_box(box, img_c, label=label, color=colors[int(cls)], line_thickness=3) return img, img_c
def process_image(transform,processing_model,img): global network, class_names, class_colors tracks = [] # imgs = [] (device,model,names,colors,imgsz) = processing_model # view_img = True try: im0 = img.copy() img = letterbox(im0)[0] #, new_shape=(imgsz,imgsz))[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, 0.25, 0.45, classes=0)#, agnostic=opt.agnostic_nms) # # Apply Classifier # if classify: # pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image s = '%g: ' % i gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) img = im0 tracks = pred except Exception as e: track = traceback.format_exc() print(track) print("YOLO 5 Exception",e) pass return tracks,img
def detect(): imgsz = check_img_size(512, s=model.stride.max()) # check img_size names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model_reco( img.half() if half else img) if device.type != 'cpu' else None _ = model(img.half() if half else img) if device.type != 'cpu' else None #if device.type != 'cpu' else None # run once img_list = [ file for file in os.listdir('test_img') if file.endswith('.jpg') ] for j in img_list: start = time.time() new_name = j[:-4] + '.png' img0 = cv2.imread('test_img/' + j) img = letterbox(img0, new_shape=512)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference pred = model(img, augment=False)[0] pred = non_max_suppression(pred, 0.4, 0.5, classes=0, agnostic=False) # Process detections for i, det in enumerate(pred): gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() for *xyxy, conf, cls in reversed(det): x1, y1, x2, y2 = int(xyxy[0]), int(xyxy[1]), int( xyxy[2]), int(xyxy[3]) crop = img0[y1:y2, x1:x2] value = final_test.detect(crop, device, model_reco, half) print(value) plot_one_box(xyxy, img0, label=value, color=colors[int(cls)], line_thickness=3) cv2.imwrite('result/{}'.format(new_name), img0) end = time.time() print('Time::', end - start)
def prediction(self, frame, sceneId, timeID): is_warning = False scene = self.sm.get_scene(sceneId) img_org = frame.copy() img = letterbox(frame, new_shape=640)[0] img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device).half() img /= 255.0 img = img.unsqueeze(0) t1 = time_synchronized() # Inference pred = self.model(img)[0] pred = non_max_suppression(pred, self.conf_thresh, self.iou_thresh) t2 = time_synchronized() #print('detect inference cost. (%.3fs)' % (t2 - t1)) # Process detections for i, det in enumerate(pred): if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], frame.shape).round() if det is not None and len(det): for *box, conf, cls in reversed(det): label = f'{self.names[int(cls)]} {conf:.2f}' c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) w_c = int(box[2])-int(box[0]) h_c = int(box[3])-int(box[1]) c1_new,c2_new = (int(box[0]), int(box[1]+h_c/6)), (int(box[2]), int(box[1]+h_c*7/12)) point = (int((box[0]+box[2])/2), int(box[3])) b_in_zone = scene.point_warn_zone_test(point) if b_in_zone == False: continue if self.names[int(cls)] in ['person']: #plot_one_box(box, frame, label=label, color=(0,255,0), line_thickness=3) #frame_crop = frame[c1[1]:c2[1], c1[0]:c2[0]] frame_crop = frame[c1_new[1]:c2_new[1], c1_new[0]:c2_new[0]] is_warning = self.prediction2(frame_crop) plot_one_box(box, frame, label=label, color=(0,255,0), line_thickness=3) cv2.rectangle(frame, c1_new, c2_new, (0,0,255), 3) if is_warning: break cv2.polylines(frame, scene.warn_polygons, True, (0, 255, 255), 2) if is_warning: cv2.putText(frame, "WARNING", (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) self.write_frame(img_org, sceneId, timeID) else: cv2.putText(frame, "NORMAL", (5,30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) imshow_name = "image"+str(sceneId) frame_resize = cv2.resize(frame, (1280, 720)) cv2.imshow(imshow_name, frame_resize) if cv2.waitKey(1) & 0xFF == (ord('q') or ord('Q')): raise Exception("exit") return is_warning, frame
def prediction(self, frame, sceneId, timeID, zone): is_warning = False img_org = frame.copy() zone = [zone.reshape(zone.shape[0], 1, zone.shape[1])] img = letterbox(frame, new_shape=640)[0] img = img[:, :, ::-1].transpose(2, 0, 1) img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(self.device).half() img /= 255.0 img = img.unsqueeze(0) t1 = time_synchronized() # Inference with torch.no_grad(): pred = self.model(img)[0] pred = non_max_suppression(pred, self.conf_thresh, self.iou_thresh) t2 = time_synchronized() #print('Vehicle detect inference cost. (%.3fs)' % (t2 - t1)) # Process detections for i, det in enumerate(pred): if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], frame.shape).round() if det is not None and len(det): for *box, conf, cls in reversed(det): label = f'{self.names[int(cls)]} {conf:.2f}' c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) point = (int((box[0] + box[2]) / 2), int( (box[1] + box[3]) / 2)) b_in_zone = self.point_zone_test(point, zone) if b_in_zone == False: continue if self.names[int(cls)] in [ 'car', 'motorcycle', 'bus', 'truck' ]: plot_one_box(box, frame, label=label, color=(0, 0, 255), line_thickness=3) is_warning = True cv2.polylines(frame, zone, True, (0, 255, 255), 2) if is_warning: cv2.putText(frame, "WARNING", (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) self.write_frame(img_org, sceneId, timeID) else: cv2.putText(frame, "NORMAL", (5, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) imshow_name = "vehicle" + str(sceneId) cv2.imshow(imshow_name, frame) if cv2.waitKey(1) & 0xFF == (ord('q') or ord('Q')): raise Exception("exit") return is_warning, frame
def webcam_out(q1, q2, q3, q4, q5, q6, q7): Squat_score, Bench_score, Dead_score = 0, 0, 0 Squat_Before_flag, Bench_Before_flag, Dead_Before_flag = False, False, False fourcc = 'mp4v' # output video codec x = 0 while True: if not q1.empty(): frame = q1.get() if not q2.empty(): q2num, label, colors, name = q2.get() q4.put(q2num) if not q3.empty(): poses, num = q3.get() try: frame2 = frame[num[1]:num[3], num[0]:num[2]].copy() canvas, Squat_score, Bench_score, Dead_score, Squat_Before_flag, Bench_Before_flag, Dead_Before_flag, \ squat_status, bench_status, dead_status = draw_person_pose(frame2, poses, Squat_score, Bench_score, Dead_score,\ Squat_Before_flag, Bench_Before_flag, Dead_Before_flag, name) frame[num[1]:num[3], num[0]:num[2]] = canvas outq6 = Squat_score, Bench_score, Dead_score q6.put(outq6) except: pass try: plot_one_box(q2num, frame, label=label, color=colors, line_thickness=3) except: pass q5.put(frame) if not q7.empty(): fps, w, h, source, ret = q7.get() try: vid_writer except UnboundLocalError: vid_writer = cv2.VideoWriter( f"{source[:-4]}_pose_estimation.mp4", cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) try: vid_writer.write(frame) except UnboundLocalError: pass x = 0 elif q7.empty(): try: vid_writer if x == 2000: vid_writer.release() break except UnboundLocalError: pass x += 1
def inference(img_path: Path): img, size_orig, size = load_image(str(img_path), img_size=imgsz, augment=augment) # img = cv2.imread(str(img_path)) if rgb: img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) im0s = img.copy() img = torch.from_numpy(img).to(device) img = img.permute(2, 0, 1) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() torch.cuda.synchronize() # INFO: Apply Classifier deleted # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = img_path, '', im0s s += '%gx%g ' % img.shape[2:] # print string h, w = im0s.shape[:2] gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) im0 = plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, font=font) # Print time (inference + NMS) logger.info('%sDone. (%.3fs)' % (s, t2 - t1)) return im0
def __call__(self, trte='test', cf=False): p = f'{self.dp}/images/{trte}' imgs = [ f'{self.dp}/images/{trte}/{x}' for x in os.listdir(p) if x.endswith('.jpg') ] imgs = sorted(imgs) for i, imgp in enumerate(imgs): stem = Path(imgp).stem labelp = f'{self.dp}/labels/{trte}/{Path(imgp).stem}.txt' img = cv2.imread(imgp) h, w, c = img.shape with open(labelp, 'r') as f: label = f.readlines() label = np.array([x.split() for x in label], dtype=np.float32) classes = label[:, 0] bboxes = label[:, 1::] bboxes = xywh2xyxy(bboxes) for j in range(len(label)): cls = classes[j] bbox = bboxes[j] bbox[0] *= w bbox[1] *= h bbox[2] *= w bbox[3] *= h plot_one_box(bbox, img, label=f'{self.names[int(cls)]}', color=self.colors[int(cls)]) print( f'imgs: {len(imgs)} stem: {stem} img_shape: {img.shape} lb: {label}' ) # cr = np.any(label[:, 0] == 1) crit = 'fogged' in stem if cf else True if crit: cv2.imshow('xx', img) if cv2.waitKey(0) == ord('q'): exit()
def detect(source, weights, view_img=True, imgsz=640, conf_thres=0.8, iou_thres=0.7, classes=None, agnostic_nms=True, focal_distance=0.03, car_height=1.7): device = select_device('0') half = True model = attempt_load(weights, map_location=device) model.half() dataset = LoadImages(source, img_size=imgsz) colors = (0, 0, 255) names = model.module.names if hasattr(model, 'module') else model.names img = torch.zeros((1, 3, imgsz, imgsz), device=device) _ = model(img.half()) for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() img /= 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=True)[0] pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) for i, det in enumerate(pred): p, s, im0 = path, '', im0s s += '%gx%g ' % img.shape[2:] if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # print(det) # print(det.shape) # print(det[0][4]) # max_det = det[0] for *xyxy, conf, cls in reversed(det): if view_img: car_img_height = xyxy[3] - xyxy[1] label = '%s %.2f' % (names[int(cls)], conf) distance = ( focal_distance / car_img_height) * car_height * 10000 - 1 # distance = (f/obj height) * real height result_distance = str(round(distance.item())) + 'm' inner = plot_one_box(xyxy, im0, label=label, color=colors, line_thickness=3) tl = 3 or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])) tf = max(tl - 1, 1) if inner is True: cv2.putText(im0, result_distance, (c1[0] - 15, c1[1] + 75), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration
def detect(source, save_img=False): weights= 'final_weights.pt' imgsz = 832 # Padded resize img = letterbox(source, new_shape=imgsz)[0] # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = ascontiguousarray(img) set_logging() device = select_device('') model = attempt_load(weights, map_location=device) check_img_size(imgsz, s = model.stride.max()) names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0,255) for _ in range(3)] for _ in range(len(names))] img = torch.from_numpy(img).to(device) img = img.float() img /= 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=False)[0] pred = non_max_suppression(pred, 0.4, 0.5, classes=None, agnostic=False) for i, det in enumerate(pred): if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], source.shape).round() for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, source, label=label, color=colors[int(cls)], line_thickness=3) # cv2.imshow('abc', source) # cv2.waitKey(5000) return source
def predict(self, img0, img=None, draw_bndbox=False, bndbox_format='min_max_list'): if img is None: img = self.send_whatever_to_device(img0) else: img = self.send_to_device(img) pred = self.model(img, augment=self.augment)[0] pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=self.classes, agnostic=self.agnostic_nms) det = pred[0] if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() if draw_bndbox: for *xyxy, conf, cls in det: label = '%s %.2f' % (self.names[int(cls)], conf) plot_one_box(xyxy, img0, label=label, color=self.colors[int(cls)]) if bndbox_format == 'min_max_list': min_max_list = self.min_max_list(det) return min_max_list
def draw_bbox(img,pred,boxes): img_c = img.copy() for box in boxes: x1 = int((box[1] - box[3] / 2)) y1 = int((box[2] - box[4] / 2)) x2 = int((box[1] + box[3] / 2)) y2 = int((box[2] + box[4] / 2)) cv2.rectangle(img, (x1, y1), (x2, y2), (255,0,0), 2) if pred != None: for box in pred: x1 = int(box[0]) y1 = int(box[1]) x2 = int(box[2]) y2 = int(box[3]) conf = box[4] cls = box[5] obj_conf = box[6] cls_conf = box[7] text = '%s|%.2f'%(names[int(cls)], conf) cv2.rectangle(img, (x1, y1), (x2, y2), (0,0,255), 2) cv2.putText(img, text, (x1, y1 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,255), 2) label = '%s %.2f %.2f %.2f' % (names[int(cls)], conf, obj_conf, cls_conf) plot_one_box(box, img_c, label=label, color=colors[int(cls)], line_thickness=3) return img,img_c
def draw_frame(frame_read, img, pred, cls_names, colors, out_img_name, save_img=True): # Process detections for per image for i, det in enumerate(pred): print("det: ", det) s_log = '' if det is not None and len(det): # Rescale boxes from img_size to im_ori size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], frame_read.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s_log += '%g %ss, ' % (n, cls_names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): # Add bbox to image label = '%s %.2f' % (cls_names[int(cls)], conf) plot_one_box(xyxy, frame_read, label=label, color=colors[int(cls)]) print('s_log: %s Done' % (s_log)) # Save results (image with detections) if save_img: cv2.imwrite(out_img_name, frame_read)
def detect(): # Run inference pipe = 0 pipe = 'http://192.168.1.7:8080/video' # pipe = 'video/MVI_4381.MOV' cap = cv2.VideoCapture(pipe) while True: ret_val, frame = cap.read() img = letterbox(frame, new_shape=imgsz)[0] img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = np.ascontiguousarray(img) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) pred = model(img, augment=opt.augment)[0] pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes) for index, detect in enumerate(pred): if detect is not None and len(detect): # Rescale boxes from img_size to frame size detect[:, :4] = scale_coords(img.shape[2:], detect[:, :4], frame.shape).round() for *xyxy, conf, cls in detect: label = names[int(cls)] plot_one_box(xyxy, frame, label=label, color=colors[int(cls)], line_thickness=2) cv2.imshow("ObjectDetect", frame) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration
xywh = (xyxy2xywh(torch.tensor(xyxy).view( 1, 4))).view(-1).tolist() cls = int(cls) img_object.append(xywh) cls_object.append(names[cls]) if names[cls] == "hero" and conf > hero_conf: hero_conf = conf hero_index = idx if view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=2) # 游戏 thx = 30 # 捡东西时,x方向的阈值 thy = 30 # 捡东西时,y方向的阈值 attx = 150 # 攻击时,x方向的阈值 atty = 50 # 攻击时,y方向的阈值 if current_door( img0) == 1 and time.time() - door1_time_start > 10: door1_time_start = time.time() # move(direct="RIGHT", action_cache=action_cache, press_delay=press_delay, # release_delay=release_delay) # ReleaseKey(direct_dic["RIGHT"])
def detect(weights='mdp/weights/weights.pt', source='mdp/videos', output='mdp/output', img_size=416, conf_thres=0.01, iou_thres=0.5, device='', classes=None, agnostic_nms=False, augment=False, update=False, scale_percent=50): save_img = True predicted_label = None out, imgsz = output, img_size webcam = source.isnumeric() or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: # save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once row_num = 0 for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].detach().unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in det: predicted_label = names[int(cls)] if predicted_label: label_id = label_id_mapping.get(predicted_label) xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh print(('%s ' * 5 + '\n') % (label_id, *xywh)) # label format # r = requests.post(source, json={'label': label_id}) # send result to rpi # print(r.text) if False and conf < confidence_threshold(label_id): # fine tune for up arrow (white) # cv2.imshow('ImageWindow', im0) break # if not check_bounding_box(xywh): # # cv2.imshow('ImageWindow', im0) # break label = '%s %.2f' % (label_id, conf) good, text = check_bounding_box(xywh, im0.shape[0], im0.shape[1]) if not good: label = text plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # cv2.imshow('ImageWindow', im0) break # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model # add by zlf at 20201027 # load FP16 model # model=torch.load(weights)['model'] # for n,p in model.named_parameters(): # print(p.dtype) model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # 20201019 load model by zlf # new method of loading weight;only for 'torch.save(model,state_dict())' # net = Model('./models/yolov5s.yaml').cuda() # state_dict = torch.jit.load('QuantCRNN_1_14000.pt', map_location=torch.device('cpu')) # model = state_dict # model.half().cuda() # model_dict = net.state_dict() # # for k, v in state_dict.items(): # name = k[7:] # remove `module.` # model_dict[name] = v # net.load_state_dict(model_dict, strict=True) # model = net # imgsz = 320 # add by zlf at 20201009 if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True #TODO:cudnn.benchmark = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors # names = model.module.names if hasattr(model, 'module') else model.names names = [ 'CAR', 'CARPLATE', 'BICYCLE', 'TRICYCLE', 'PEOPLE', 'MOTORCYCLE', 'LOGO_AUDI', 'LOGO_BENZE', 'LOGO_BENZC', 'LOGO_BMW', 'LOGO_BUICK', 'LOGO_CHEVROLET', 'LOGO_CITROEN', 'LOGO_FORD', 'LOGO_HONDA', 'LOGO_HYUNDAI', 'LOGO_KIA', 'LOGO_MAZDA', 'LOGO_NISSAN', 'LOGO_PEUGEOT', 'LOGO_SKODA', 'LOGO_SUZUKI', 'LOGO_TOYOTA', 'LOGO_VOLVO', 'LOGO_VW', 'LOGO_ZHONGHUA', 'LOGO_SUBARU', 'LOGO_LEXUS', 'LOGO_CADILLAC', 'LOGO_LANDROVER', 'LOGO_JEEP', 'LOGO_BYD', 'LOGO_BYDYUAN', 'LOGO_BYDTANG', 'LOGO_CHERY', 'LOGO_CARRY', 'LOGO_HAVAL', 'LOGO_GREATWALL', 'LOGO_GREATWALLOLD', 'LOGO_ROEWE', 'LOGO_JAC', 'LOGO_HAFEI', 'LOGO_SGMW', 'LOGO_CASY', 'LOGO_CHANAJNX', 'LOGO_CHANGAN', 'LOGO_CHANA', 'LOGO_CHANGANCS', 'LOGO_XIALI', 'LOGO_FAW', 'LOGO_YQBT', 'LOGO_REDFLAG', 'LOGO_GEELY', 'LOGO_EMGRAND', 'LOGO_GLEAGLE', 'LOGO_ENGLON', 'LOGO_BAOJUN', 'LOGO_DF', 'LOGO_JINBEI', 'LOGO_BAIC', 'LOGO_WEIWANG', 'LOGO_HUANSU', 'LOGO_FOTON', 'LOGO_HAIMA', 'LOGO_ZOTYEAUTO', 'LOGO_MITSUBISHI', 'LOGO_RENAULT', 'LOGO_MG', 'LOGO_DODGE', 'LOGO_FIAT', 'LOGO_INFINITI', 'LOGO_MINI', 'LOGO_TESLA', 'LOGO_SMART', 'LOGO_BORGWARD', 'LOGO_JAGUAR', 'LOGO_HUMMER', 'LOGO_PORSCHE', 'LOGO_LAMBORGHINI', 'LOGO_DS', 'LOGO_CROWN', 'LOGO_LUXGEN', 'LOGO_ACURA', 'LOGO_LINCOLN', 'LOGO_SOUEAST', 'LOGO_VENUCIA', 'LOGO_TRUMPCHI', 'LOGO_LEOPAARD', 'LOGO_ZXAUTO', 'LOGO_LIFAN', 'LOGO_HUANGHAI', 'LOGO_HAWTAI', 'LOGO_REIZ', 'LOGO_CHANGHE', 'LOGO_GOLDENDRAGON', 'LOGO_YUTONG', 'LOGO_HUIZHONG', 'LOGO_JMC', 'LOGO_JMCYUSHENG', 'LOGO_LANDWIND', 'LOGO_NAVECO', 'LOGO_QOROS', 'LOGO_OPEL', 'LOGO_YUEJING' ] colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: f1.write('%s:' % (path.split('/')[-1])) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # pred = model(img.cuda()) # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls, obj_conf, cls_conf in reversed( det): # add by zlf at 20201026 # add by zlf at 20201019 x1 = int(xyxy[0].item()) y1 = int(xyxy[1].item()) x2 = int(xyxy[2].item()) y2 = int(xyxy[3].item()) f1.write( "[%s,%.2f,%d,%d,%d,%d]" % (names[int(cls.item())], round( (conf.item() * 100), 2), x1, y1, x2, y2)) # add by zlf at 20201019 if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s|%.2f|%.2f|%.2f' % (names[int(cls)], conf, obj_conf, cls_conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) f1.write('\n') # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder if os.path.exists(opt.features): shutil.rmtree(opt.features) # delete features output folder if os.path.exists(opt.crops): shutil.rmtree(opt.crops) # delete output folder with object crops os.makedirs(out) # make new output folder os.makedirs(opt.features) # make new output folder os.makedirs(opt.crops) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # log file dictionary: save frames when track_id object is detected log_frames = {"FPS": dataset.cap.get(cv2.CAP_PROP_FPS)} print("FRAMES PER SECOND ", dataset.cap.get(cv2.CAP_PROP_FPS)) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Find index corresponding to a person idx_person = names.index("person") # Deep SORT: initialize the tracker cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Deep SORT: person class only idxs_ppl = ( det[:, -1] == idx_person ).nonzero(as_tuple=False).squeeze( dim=1) # 1. List of indices with 'person' class detections dets_ppl = det[idxs_ppl, : -1] # 2. Torch.tensor with 'person' detections print('\n {} people were detected!'.format(len(idxs_ppl))) # Deep SORT: convert data into a proper format xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu") confs = dets_ppl[:, 4].to("cpu") # Deep SORT: feed detections to the tracker if len(dets_ppl) != 0: trackers, features = deepsort.update(xywhs, confs, im0) for d in trackers: ##### DEEP SORT feature object saver #### track_id = d[4] fname_features = opt.features + '/ID_{}'.format( track_id) fname_crops = opt.crops + '/ID_{}'.format(track_id) if not os.path.exists(fname_features): os.mkdir(fname_features) os.mkdir(fname_crops) log_frames['ID_' + str(track_id)] = [] # choose format to save feature arrays on your machine: # https://machinelearningmastery.com/how-to-save-a-numpy-array-to-file-for-machine-learning/ save_format = 'csv' filename = fname_features + "/feature_frame_" + str( dataset.frame) if save_format == 'csv': savetxt(filename + '.csv', features[track_id], delimiter=',') #data = numpy.loadtxt('data.csv', delimiter=',') elif save_format == 'npy': save(filename + '.npy', features[track_id]) #data = numpy.load('data.npy') elif save_format == 'npz': savez_compressed(filename + '.npz', features[track_id]) # dict_data = load('data.npz'); data = dict_data['arr_0'] # update log file with track_id detection history log_frames['ID_' + str(track_id)].append(dataset.frame) # save croped image im_crop = im0[d[1]:d[3], d[0]:d[2], :] cv2.imwrite(filename=fname_crops + "/image_crop_" + str(dataset.frame) + '.jpg', img=im_crop) plot_one_box(d[:4], im0, label='ID' + str(int(d[4])), color=colors[1], line_thickness=1) # DEEP SORT: save updated log file log_format = 'txt' if log_format == 'txt': f_log = open(opt.features + "/log_detection.txt", "w") f_log.write(str(log_frames)) elif log_format == 'pkl': f_log = open(opt.features + "/log_detection.pkl", "wb") pickle.dump(log_frames, f_log) f_log.close() ################################### # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) folder_main = out.split('/')[0] if os.path.exists(out): shutil.rmtree(out) # delete output folder folder_features = folder_main + '/features' if os.path.exists(folder_features): shutil.rmtree(folder_features) # delete features output folder folder_crops = folder_main + '/image_crops' if os.path.exists(folder_crops): shutil.rmtree(folder_crops) # delete output folder with object crops os.makedirs(out) # make new output folder os.makedirs(folder_features) # make new output folder os.makedirs(folder_crops) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # frames per second fps = dataset.cap.get(cv2.CAP_PROP_FPS) critical_time_frames = opt.time * fps # COUNTER: initialization counter = VoteCounter(critical_time_frames, fps) print('CRITICAL TIME IS ', opt.time, 'sec, or ', counter.critical_time, ' frames') # Find index corresponding to a person idx_person = names.index("person") # Deep SORT: initialize the tracker cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # AlphaPose: initialization args_p = update_config(opt.config_alphapose) cfg_p = update_config(args_p.ALPHAPOSE.cfg) args_p.ALPHAPOSE.tracking = args_p.ALPHAPOSE.pose_track or args_p.ALPHAPOSE.pose_flow demo = SingleImageAlphaPose(args_p.ALPHAPOSE, cfg_p, device) output_pose = opt.output.split('/')[0] + '/pose' if not os.path.exists(output_pose): os.mkdir(output_pose) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # COUNTER: compute urn centoid (1st frame only) and plot a bounding box around it if dataset.frame == 1: counter.read_urn_coordinates(opt.urn, im0s, opt.radius) counter.plot_urn_bbox(im0s) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Deep SORT: person class only idxs_ppl = ( det[:, -1] == idx_person ).nonzero(as_tuple=False).squeeze( dim=1) # 1. List of indices with 'person' class detections dets_ppl = det[idxs_ppl, : -1] # 2. Torch.tensor with 'person' detections print('\n {} people were detected!'.format(len(idxs_ppl))) # Deep SORT: convert data into a proper format xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu") confs = dets_ppl[:, 4].to("cpu") # Deep SORT: feed detections to the tracker if len(dets_ppl) != 0: trackers, features = deepsort.update(xywhs, confs, im0) # tracks inside a critical sphere trackers_inside = [] for i, d in enumerate(trackers): plot_one_box(d[:-1], im0, label='ID' + str(int(d[-1])), color=colors[1], line_thickness=1) # COUNTER d_include = counter.centroid_distance( d, im0, colors[1], dataset.frame) if d_include: trackers_inside.append(d) # ALPHAPOSE: show skeletons for bounding boxes inside the critical sphere if len(trackers_inside) > 0: pose = demo.process('frame_' + str(dataset.frame), im0, trackers_inside) im0 = demo.vis(im0, pose) demo.writeJson([pose], output_pose, form=args_p.ALPHAPOSE.format, for_eval=args_p.ALPHAPOSE.eval) counter.save_features_and_crops( im0, dataset.frame, trackers_inside, features, folder_main) cv2.putText(im0, 'Voted ' + str(len(counter.voters_count)), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2) print('NUM VOTERS', len(counter.voters)) print(list(counter.voters.keys())) # COUNTER if len(counter.voters) > 0: counter.save_voter_trajectory(dataset.frame, folder_main) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt, save_img=False): out, source, weights, imgsz, namelist = \ opt.output, opt.source, opt.weights, opt.img_size, opt.namelist set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) #if device.type != 'cpu' else None # run once idx = 0 ckname = [] for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 idx += 1 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) cnt = 0 # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() cntname = 0 # Write results img2 = im0.copy() nperson = [] nname = [] for *xyxy, conf, cls in reversed(det): if save_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) ######################################################################################################## ##classes 변수 생성 (이름) classes = names[int(cls)] ##classes 변수 함수에 추가 plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, classes=classes) ##사람이라고 판단한 물체의 각 좌표 리스트에 저장 if classes == 'person': nperson.append([ int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]) ]) if classes == 'name': nname.append([ int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]) ]) ##이름 리스트의 크기가 0보다 클 때 미리 복사해둔 프레임의 구역으로 이미지 덮기 #print(len(nperson)) if len(nname) > 0: key = 45 for pi in range(len(nperson)): check = False for ii in range(len(nname)): if nname[ii][1]>=nperson[pi][1] and nname[ii][3]<=nperson[pi][3] and nname[ii][0]>=nperson[pi][0] \ and nname[ii][2]<=nperson[pi][2] and check==False: check = True proi = img2[nname[ii][1]:nname[ii][3], nname[ii][0]:nname[ii][2]] temp_img = "{0}_{1}_{2}_{3}.jpg".format( nname[ii][1], nname[ii][3], nname[ii][0], nname[ii][2]) image_path = "./temp/{0}".format(temp_img) img_shape = proi.shape # print(proi) #image_path2 = "./temp/tt_{0}".format(temp_img) ####################################### encrypt_function(proi, image_path, key) # os.remove(image_path) text_ = decrypt_function( image_path, key, img_shape) #cv2.imwrite(image_path2, text_) ######################################### #print("coord:",nname[ii][1],nname[ii][3],nname[ii][0],nname[ii][2]) # OCR (이름 매칭 확인) => return True / False result, check_name = ocr.check_name( text_, namelist) if result == True: cntname += 1 if check_name not in ckname: ckname.append(check_name) roi = img2[nperson[pi][1]:nperson[pi][3], nperson[pi][0]:nperson[pi][2]] im0[nperson[pi][1]:nperson[pi][3], nperson[pi][0]:nperson[pi][2]] = roi #cv2.imwrite('.\check\{}.jpg'.format(idx),im0) ######################################################################################################## # Print time (inference + NMS) #print('%sDone. (%.3fs)' % (s, t2 - t1)) removeAllFile('./temp') # Save results (image with detections) if save_img: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( './output.mp4', cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0)
def detect(save_img=False): print_div('INTIL') out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size # Initialize print_div('GET DEVICE') set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model print_div('LOAD MODEL') model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier print_div('LOAD MODEL_CLASSIFIER') classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Get names and colors print_div('SET LABEL COLOR') names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference ############################################################################### print_div("RUN INFERENCE") img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once video_path = source cap = cv2.VideoCapture(video_path) print_div('Start Play VIDEO') while cap.isOpened(): ret, frame = cap.read() t0 = time.time() if not ret: print_div('No Frame') break fps_t1 = time.time() img, img0 = img_preprocess(frame) # img: Resize , img0:Orginal img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS : 取得每項預測的數值 pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier : 取得該數值的LAbel if classify: pred = apply_classifier(pred, modelc, img, img0) # Draw Box for i, det in enumerate(pred): s = '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3) # Print Results(inference + NMS) print_div('%sDone. (%.3fs)' % (s, t2 - t1)) # Draw Image x, y, w, h = (img0.shape[1] // 4), 25, (img0.shape[1] // 2), 30 cv2.rectangle(img0, (x, 10), (x + w, y + h), (0, 0, 0), -1) rescale = 0.5 re_img0 = (int(img0.shape[1] * rescale), int(img0.shape[0] * rescale)) cv2.putText( img0, '{} | inference: {:.4f}s | fps: {:.4f}'.format( opt.weights[0], t2 - t1, 1 / (time.time() - t0)), (x + 20, y + 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow('Stream_Detected', cv2.resize(img0, re_img0)) key = cv2.waitKey(1) if key == ord('q'): break # After break cap.release() cv2.destroyAllWindows()
def detect(opt, dp, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max( )) # check img_size 如果不是32的倍数,就向上取整调整至32的倍数并答应warning if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if opt.use_roi: print(dp.cl) # print(dp.cl[0], dp.cl[1]) # cl = opt.control_line cl = dp.cl roi_in_pixels = np.array([0, cl[0], 1280, cl[1]]) # two points coor, x1, y1, x2, y2 else: roi_in_pixels = None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz, roi=roi_in_pixels) # Get names and colors names = model.module.names if hasattr( model, 'module') else model.names # 解决GPU保存的模型多了module属性的问题 colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # 随机颜色,对应names,names是class # prune # torch_utils.prune(model, 0.7) # model.eval() # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once 空跑一次,释放!!牛逼 detected_img_id = 0 time_list = [None] * len(dataset) for iii, (path, img, im0s, vid_cap, recover) in enumerate(dataset): # print(img.shape, im0s.shape, vid_cap) # exit() # img.shape [3, 384, 640] im0s.shape [720, 1280, 3] None img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # 从[3, h, w]转换为[batch_size, 3, h, w]的形式 # Inference t1 = time_synchronized() # print('aug', opt.augment) # False pred = model(img, augment=opt.augment)[0] # print(pred.shape) [1, 15120, 25] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() print(f'infer time:{t2-t1:.4f}s ', end='') time_list[iii] = t2 - t1 # print('\n', len(pred), pred, recover) # list 长度是bs,代表每张图, 元素tensor,代表检测到的目标,每个tensor.shape [n, 6] xy4, conf, cls # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if opt.use_roi and det is not None: small_img_shape = torch.from_numpy( np.array([recover[1], recover[0]]).astype(np.float)) det[:, 0], det[:, 2] = det[:, 0] + recover[2], det[:, 2] + recover[2] det[:, 1], det[:, 3] = det[:, 1] + recover[3], det[:, 3] + recover[3] else: small_img_shape = img.shape[2::] if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s # im0s是原图 save_path = str(Path(out) / Path(p).name) # output/filenamexxxx.jpg txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') # output/filenamexxxx.txt s += '%gx%g ' % img.shape[2:] # print string, 640x640 gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh # 本来是[720, 1280, 3],重复取,变成[1280, 720, 1280, 720] if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( small_img_shape, det[:, :4], im0.shape).round() # 转换成原图的x1 y1 x2 y1,像素值 # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)] ) # add to string # i.e. 1 crosswalk # s += f'{det[:, 4].item():.4f} ' # print(n) # Write results for *xyxy, conf, cls in det: if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: x, y, w, h = xywh string = f"{int(cls)} {conf.item():.4f} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n" f.write(string) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) # print(type(im0), im0.shape) array, 720, 1280, 3 if names[int(cls)] in opt.plot_classes: # color = colors[int(cls)] color = (255, 85, 33) plot_one_box(xyxy, im0, label=label, color=color, line_thickness=5) # Print time (inference + NMS) prt_str = '%sDone. (%.5fs)' % (s, t2 - t1) print(prt_str) os.system(f'echo "{prt_str}" >> {opt.output}/detect.log') # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': im0 = dp.dmpost(im0, det, det_id=detected_img_id, filename=Path(p).name, names=names) cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) # print(detected_img_id, p, txt_path) tmp_filename = Path(txt_path).stem im0 = dp.dmpost(im0, det, det_id=detected_img_id, filename=tmp_filename, names=names) vid_writer.write(im0) detected_img_id += 1 if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0)) time_arr = np.array(time_list) prnt = f'Done. Network mean inference time: {np.mean(time_arr):.5f}s, Mean FPS: {1/np.mean(time_arr):.4f}.' print(f'\nModel size {opt.img_size} inference {prnt}') os.system(f'echo "{prnt}" >> {opt.output}/detect.log') os.system(f'echo "useroi {opt.img_size} {prnt}" >> detect2.log')
def detect(self, save_img=False): # Get names and colors names = self.model.module.names if hasattr( self.model, 'module') else self.model.names #colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference #t0 = time.time() img = torch.zeros((1, 3, self.imgsz, self.imgsz), device=self.device) # init img _ = self.model(img.half() if self.half else img ) if self.device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in self.dataset: img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = self.model(img, augment=self.opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, self.opt.conf_thres, self.opt.iou_thres, classes=self.opt.classes, agnostic=self.opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if self.classify: pred = apply_classifier(pred, self.modelc, img, im0s) #print("pred",pred) # Process detections for i, det in enumerate(pred): # detections per image ''' if self.webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: ''' p, s, im0 = path, '', im0s save_path = str(Path(self.out) / Path(p).name) txt_path = str(Path(self.out) / Path(p).stem) + ( '_%g' % self.dataset.frame if self.dataset.mode == 'video' else '') #s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh # detect 했을 경우 if det is not None and len(det): total = 0.0 # Rescale boxes from img_size to im0 size #print("type : " , type(det)) #print("det : " , det) goods_type = None percent = None more_than_90 = 0 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %s, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if self.save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format #print(cls) if self.save_img or self.view_img: # Add bbox to image goods_type = names[int(cls)] percent = '%.2f' % (conf) #print(type(percent)) percent = float(percent) #print("percent",type(percent)) label = '%s %.2f' % (names[int(cls)], conf) print(percent) if percent > 0.85: #plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) plot_one_box(xyxy, im0, label=label, color=(0, 0, 255), line_thickness=3) total = total + percent more_than_90 += 1 #avg = total/len(det) if more_than_90 != 0: avg = total / more_than_90 avg = round(avg, 2) #print(total) #print(more_than_90) #print(avg) print("names : ", names[int(cls)]) #print("확률 : %.2f", percent ) cv_img, name, color = self.d.load_image(goods_type) if cv_img is not None: qt_img = self.convert_cv_qt(cv_img) self.updateFeatureLable(qt_img) #self.infomsg_append("[DETECT] 품종 : %s, 코드 : %s, 개수 : %d" % (name, goods_type, len(det))) #img_time = datetime.datetime.now().strftime("%Y-%m-%d,%H:%M:%S") img_time = datetime.datetime.now().strftime( "%H:%M:%S") img_date = datetime.datetime.now().strftime( "%Y_%m_%d") self.infomsg_append( img_time + ",%s,%s,%d" % (name, goods_type, more_than_90)) #log_string = img_time + "," + name + ","+goods_type+"," + str(len(det)) +","+ avg log_string = img_time + "," + name + "," + goods_type + "," + str( more_than_90) + "," + str(avg) try: if not os.path.exists("log"): os.makedirs("log") except OSError: print('Error: Creating directory. log') f = open("./log/" + img_date + '_log.csv', mode='at', encoding='utf-8') f.writelines(log_string + '\n') f.close() print(log_string) #print("db 이미지 업로드 성공") #detect 없을 시 else: print("해당 품목 db에서 조회불가 ") self.iv.clear() self.f_label.clear() else: print("detect 없음") #self.infomsg_append("[DETECT] 위 품종은 신규 학습이 필요합니다.") #self.infomsg_append("detect 학습 필요") print(s) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) self.iv.setImage(self.convert_cv_qt(im0)) # Stream results if self.view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if self.save_img: if self.dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) '''
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per img if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s img_crops = im0s.copy() # creates a copy making img_crops for cropped versions and im0 separate for bbox version out_path = str(Path(out)) file_name = str(Path(p).name).split('.')[0] # gets name of file without extension save_path = f"{Path(out)}/{Path(p).name}" txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() handles_ymax = [] handle_mids = [] tailgates_ymin = [] tailgates_ymax = [] tailgate_ythird_coord = [] px_ratio = 1 crop_coords = {} info_to_csv = { 'file': file_name, 'objects_detected':True, 'handle_loc':None, 'handle_width':None, 'handle_height':None, 'handle_process':None, 'tg_width':None, 'tg_height':None, 'tg_process':None, 'px_ratio':None} field_names = ['file','objects_detected','handle_loc','handle_width','handle_height', 'handle_process', 'tg_width','tg_height','tg_process','px_ratio'] csv_filepath = f'./tailgate_data.csv' # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string det_sorted = sorted(det, key=lambda x: x[-1]) # sort detected items by last index which is class # Write results for *xyxy, conf, cls in reversed(det_sorted): #coords, confidence, classes.... reversed for some reason? But actually helpful since plate is cls 2 x1, y1, x2, y2 = int(xyxy[0]),int(xyxy[1]),int(xyxy[2]),int(xyxy[3]) if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if int(cls) == 3: #truck cropping (future development; requires retraining with a truck class) img_crops = img_crops[y1:y2, x1:x2] elif int(cls) == 2: # license plate license_width = abs(int(x2 - x1)) px_ratio = license_width / 12 # number of pixels per inch as license plates are 12" info_to_csv['px_ratio'] = px_ratio # im_p = img_crops[y1:y2, x1:x2] # currently no need to crop # cv2.imwrite(f'{out_path}/{file_name}_p_edge.png', im_p) # currently no need to output the picture of the license plate elif int(cls) == 1: #handle # print(f'handle y1,y2,x1,x2: {y1},{y2},{x1},{x2}') im_h = img_crops[y1:y2, x1:x2] crop_coords['h'] = [y1,y2,x1,x2] cv2.imwrite(f'{out_path}/{file_name}_yolo_h.png', im_h) elif int(cls) == 0: #tailgate im_t = img_crops[y1:y2, x1:x2] # print(f'tailgate y1,y2,x1,x2: {y1},{y2},{x1},{x2}') crop_coords['tg'] = [y1,y2,x1,x2] cv2.imwrite(f'{out_path}/{file_name}_yolo_tg.png', im_t) if save_img or view_img: # Add bbox to image #label = '%s %.2f' % (names[int(cls)], conf) #confidence not needed label = '%s ' % (names[int(cls)]) coord1, coord2, dim_label = plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, px_ratio=px_ratio) #get important points for line drawing if int(cls) == 1 and int(abs(y1-y2)) < 175: #handle coord1, coord2, dim_label = plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, px_ratio=px_ratio) ymax = max(coord1[1], coord2[1]) handles_ymax.append(ymax) xmid = int((coord1[0] + coord2[0]) / 2) ymid = int((coord1[1] + coord2[1]) / 2) handle_mids.append([xmid, ymid]) #im_h = im0[coord1[0]:coord2[0], coord1[1]:coord2[1]] elif int(cls) == 0: #tailgate coord1, coord2, dim_label = plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, px_ratio=px_ratio) tailgate_xmin = min(coord1[0], coord2[0]) ymax = max(coord1[1], coord2[1]) tailgates_ymax.append(ymax) ymin = min(coord1[1], coord2[1]) tailgates_ymin.append(ymin) tailgate_ythird = int(abs(coord1[1]-coord2[1])/3+ymin) tailgate_ythird_coord.append([tailgate_xmin, tailgate_ythird]) #im_t = img[coord1[0]:coord2[0], coord1[1]:coord2[1]] else: pass # function draws and labels the distance from bottom of handle to bottom of tailgate # if handle in top 1/3 of tailgate, returns the y coord of handle bottom, # else returns False adj_tailgate_top, info_to_csv = draw_dist_btm_h_to_btm_t(im0, handle_mids, handles_ymax, tailgates_ymax, tailgate_ythird_coord, px_ratio, info_to_csv) if adj_tailgate_top > crop_coords['tg'][0]: # This all affects final_tailgate() crop_coords['diff_adjust'] = int(adj_tailgate_top - crop_coords['tg'][0]) # crop_coords['diff_adjust'] = int(adj_tailgate_top) crop_coords['tg'][0] = int(adj_tailgate_top) transp_h = False else: transp_h, full_handle_process = handle_detect_and_mask(im_h) info_to_csv['handle_process'] = (" >>> ").join(full_handle_process) crop_coords['diff_adjust'] = False try: cv2.imwrite(f'{out_path}/{file_name}_transparent_h.png', transp_h) except: pass #function gets the handle surrounded by transparency transp_tg, full_tailgate_process = tailgate_detect_and_mask(im_t) info_to_csv['tg_process'] = (" >>> ").join(full_tailgate_process) try: cv2.imwrite(f'{out_path}/{file_name}_transparent_tg.png', transp_tg) except: pass final_image, info_to_csv = final_truck(img_crops, transp_tg, transp_h, crop_coords['tg'], crop_coords['h'], crop_coords['diff_adjust'], info_to_csv) cv2.imwrite(f'{out_path}/{file_name}_full_transparency.png', final_image) else: info_to_csv['objects_detected'] = False # write or append info_to_csv if os.path.isfile(csv_filepath): append_dict_as_row(csv_filepath, info_to_csv, field_names) else: create_csv_headers_from_dict(csv_filepath, info_to_csv, field_names) append_dict_as_row(csv_filepath, info_to_csv, field_names) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))