def recog2(det, im0, device, img_lp, imgsz_recog, half, model_recog, all_t2_t1, classify, modelc, names_recog, save_txt, gn, txt_path, save_img, view_img, colors): # Write results for *xyxy, conf, cls in reversed(det): ''' But first, Recognition ''' img_lp, img_lp0 = extract_img_lp(im0, xyxy, img_lp, device, imgsz_recog, half) t1 = time_synchronized() # Inference pred_lp = model_recog(img_lp, augment=opt.augment)[0] # Apply NMS pred_lp = non_max_suppression(pred_lp, opt.conf_thres_recog, opt.iou_thres_recog, classes=opt.classes_recog, agnostic=opt.agnostic_nms) t2 = time_synchronized() all_t2_t1 = all_t2_t1 + t2 - t1 # Apply Classifier if classify: pred_lp = apply_classifier(pred_lp, modelc, img_lp, img_lp0) # check_lp_lines_type cls = check_lp_lines_type(pred_lp[0], cls, img_lp, img_lp0) # Sort characters based on pred_lp license_str = sort_characters(pred_lp[0], cls, img_lp, img_lp0, names_recog) if len(license_str) == 0: continue if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(license_str + ' ' + ('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image # label = '%s %.2f' % (names[int(cls)], conf) label = '%s %.2f' % (license_str, conf) line_thickness = 3 if im0.shape[0] < 500 else 4 plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) return all_t2_t1
def detect(self, img): with torch.no_grad(): # Load image im0 = img.copy() img = letterbox(img, new_shape=self.imgsz)[0] # Padded resize # Convert img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 img = torch.from_numpy(np.ascontiguousarray(img)).to(self.device) img = img.float() / 255.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference pred = self.model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, self.conf_thres, self.iou_thres, classes=None, agnostic=self.agnostic_nms) # Apply Classifier if self.classify: pred = apply_classifier(pred, modelc, img, im0) # Process detections for i, det in enumerate(pred): # detections per image # Rescale boxes from img_size to im0 size if det is not None and len(det): det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() det = det.cpu().numpy() return det
def multithreading(cap, device, half, names, colors, model, q): while cap.isOpened(): ret, frame = cap.read() t0 = time.time() if not ret: print_div('No Frame') break fps_t1 = time.time() img, img0 = img_preprocess(frame) # img: Resize , img0:Orginal img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS : 取得每項預測的數值 pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier : 取得該數值的LAbel if False: pred = apply_classifier(pred, modelc, img, img0) # Draw Box for i, det in enumerate(pred): s = '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3) # Print Results(inference + NMS) #print_div('%sDone. (%.3fs)' % (s, t2 - t1)) # Draw Image x, y, w, h = (img0.shape[1] // 4), 25, (img0.shape[1] // 2), 30 cv2.rectangle(img0, (x, 10), (x + w, y + h), (0, 0, 0), -1) cv2.putText( img0, '{} | inference: {:.4f}s | fps: {:.4f}'.format( opt.weights[0], t2 - t1, 1 / (time.time() - t0)), (x + 20, y + 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) q.put(img0)
def detect( model="mobilenet_thin", # A model option for being cool weights='yolov5s.pt', # model.pt path(s) source='data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference update=False, # update all models project='runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference ): w, h = 432, 368 e = TfPoseEstimator(get_graph_path(model), target_size=(w, h)) save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = Path(project) #save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check image size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference breakCond = False if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Openpose getting keypoints and individual crops print("\n") myImg = im0s.copy() keypoints, humans = getKeyPoints(myImg, e, w, h) crops = [ getCrop(point[0], myImg, 10, device, point[1] / 2) for point in keypoints ] # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) t2 = time_synchronized() # Need to adjust bboxes to full image if len(pred) > 0: breakCond = True # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Check if any overlap between keypoint and det (handheld weapon) for detection in det: for crop in crops: if bbox_iou(detection, crop) > 0: cv2.putText(im0, "Spider-Sense Tingling!", (30, 90), cv2.FONT_HERSHEY_SIMPLEX, 3, (255, 0, 0), 5) break # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=line_thickness) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # write keypoint boxes for *xyxy, conf, cls in reversed(crops): plot_one_box(xyxy, imc, label="keyP", color=colors(c, True), line_thickness=line_thickness) # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) im0 = TfPoseEstimator.draw_humans(im0, humans, imgcopy=False) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning) print(f'Done. ({time.time() - t0:.3f}s)')
def run( weights=ROOT / 'yolov5s.pt', # model.pt path(s) source=ROOT / 'data/images', # file/dir/URL/glob, 0 for webcam imgsz=640, # inference size (pixels) conf_thres=0.25, # confidence threshold iou_thres=0.45, # NMS IOU threshold max_det=1000, # maximum detections per image device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu view_img=False, # show results save_txt=False, # save results to *.txt save_conf=False, # save confidences in --save-txt labels save_crop=False, # save cropped prediction boxes nosave=False, # do not save images/videos classes=None, # filter by class: --class 0, or --class 0 2 3 agnostic_nms=False, # class-agnostic NMS augment=False, # augmented inference visualize=False, # visualize features update=False, # update all models project=ROOT / 'runs/detect', # save results to project/name name='exp', # save results to project/name exist_ok=False, # existing project/name ok, do not increment line_thickness=3, # bounding box thickness (pixels) hide_labels=False, # hide labels hide_conf=False, # hide confidences half=False, # use FP16 half-precision inference dnn=False, # use OpenCV DNN for ONNX inference ): source = str(source) save_img = not nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(device) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model w = str(weights[0] if isinstance(weights, list) else weights) classify, suffix, suffixes = False, Path(w).suffix.lower(), [ '.pt', '.onnx', '.tflite', '.pb', '' ] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: model = torch.jit.load(w) if 'torchscript' in w else attempt_load( weights, map_location=device, fuse=False) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr( model, 'module') else model.names # get class names """ for _, param in enumerate(model.named_parameters()): print("====>", param[0], param[1].shape) torch.save(model.state_dict(), 'new_params.pt') for k, v in model.state_dict().items(): print(k, v.shape) exit() """ if half: model.half() # to FP16 if classify: # second-stage classifier modelc = load_classifier(name='resnet50', n=2) # initialize modelc.load_state_dict( torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() elif onnx: if dnn: # check_requirements(('opencv-python>=4.5.4',)) net = cv2.dnn.readNetFromONNX(w) else: check_requirements(('onnx', 'onnxruntime')) import onnxruntime session = onnxruntime.InferenceSession(w, None) else: # TensorFlow models check_requirements(('tensorflow>=2.4.1', )) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt def wrap_frozen_graph(gd, inputs, outputs): x = tf.compat.v1.wrap_function( lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import return x.prune( tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: model = tf.keras.models.load_model(w) elif tflite: interpreter = tf.lite.Interpreter( model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs int8 = input_details[0][ 'dtype'] == np.uint8 # is TFLite quantized uint8 model imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) bs = len(dataset) # batch_size else: dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference if pt and device.type != 'cpu': model( torch.zeros(1, 3, *imgsz).to(device).type_as( next(model.parameters()))) # run once dt, seen = [0.0, 0.0, 0.0], 0 for path, img, im0s, vid_cap in dataset: t1 = time_sync() if onnx: img = img.astype('float32') else: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img = img / 255.0 # 0 - 255 to 0.0 - 1.0 if len(img.shape) == 3: img = img[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference if pt: visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False pred = model(img, augment=augment, visualize=visualize)[0] anchor_grid = model.model[-1].anchors * model.model[-1].stride[ ..., None, None] delattr(model.model[-1], 'anchor_grid') # model.model[-1] is detect layer model.model[-1].register_buffer("anchor_grid", anchor_grid) model.to(device).eval() wts_file = "generated.wts" with open(wts_file, 'w') as f: f.write('{}\n'.format(len(model.state_dict().keys()))) for k, v in model.state_dict().items(): if len(v.shape) == 0: continue print(k, v.shape) vr = v.reshape(-1).cpu().numpy() f.write('{} {} {} {}'.format( k, len(vr), v.shape[0], v.shape[1] if len(v.shape) > 1 else 0)) for vv in vr: f.write(' ') f.write(struct.pack('>f', float(vv)).hex()) f.write('\n') exit() elif onnx: if dnn: net.setInput(img) pred = torch.tensor(net.forward()) else: pred = torch.tensor( session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) else: # tensorflow model (tflite, pb, saved_model) imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy if pb: pred = frozen_func(x=tf.constant(imn)).numpy() elif saved_model: pred = model(imn, training=False).numpy() elif tflite: if int8: scale, zero_point = input_details[0]['quantization'] imn = (imn / scale + zero_point).astype( np.uint8) # de-scale interpreter.set_tensor(input_details[0]['index'], imn) interpreter.invoke() pred = interpreter.get_tensor(output_details[0]['index']) if int8: scale, zero_point = output_details[0]['quantization'] pred = (pred.astype(np.float32) - zero_point) * scale # re-scale pred[..., 0] *= imgsz[1] # x pred[..., 1] *= imgsz[0] # y pred[..., 2] *= imgsz[1] # w pred[..., 3] *= imgsz[0] # h pred = torch.tensor(pred) t3 = time_sync() dt[1] += t3 - t2 # NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det) dt[2] += time_sync() - t3 # Second-stage classifier (optional) if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process predictions for i, det in enumerate(pred): # per image seen += 1 if webcam: # batch_size >= 1 p, s, im0, frame = path[i], f'{i}: ', im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if hide_labels else ( names[c] if hide_conf else f'{names[c]} {conf:.2f}') annotator.box_label(xyxy, label, color=colors(c, True)) if save_crop: save_one_box(xyxy, imc, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference-only) print(f'{s}Done. ({t3 - t2:.3f}s)') # Stream results im0 = annotator.result() if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path[i] != save_path: # new video vid_path[i] = save_path if isinstance(vid_writer[i], cv2.VideoWriter): vid_writer[i].release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer[i] = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer[i].write(im0) # Print results t = tuple(x / seen * 1E3 for x in dt) # speeds per image print( f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {colorstr('bold', save_dir)}{s}") if update: strip_optimizer(weights) # update model (to fix SourceChangeWarning)
def detect(self, save_img=False): # Get names and colors names = self.model.module.names if hasattr( self.model, 'module') else self.model.names #colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference #t0 = time.time() img = torch.zeros((1, 3, self.imgsz, self.imgsz), device=self.device) # init img _ = self.model(img.half() if self.half else img ) if self.device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in self.dataset: img = torch.from_numpy(img).to(self.device) img = img.half() if self.half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = self.model(img, augment=self.opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, self.opt.conf_thres, self.opt.iou_thres, classes=self.opt.classes, agnostic=self.opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if self.classify: pred = apply_classifier(pred, self.modelc, img, im0s) #print("pred",pred) # Process detections for i, det in enumerate(pred): # detections per image ''' if self.webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: ''' p, s, im0 = path, '', im0s save_path = str(Path(self.out) / Path(p).name) txt_path = str(Path(self.out) / Path(p).stem) + ( '_%g' % self.dataset.frame if self.dataset.mode == 'video' else '') #s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh # detect 했을 경우 if det is not None and len(det): total = 0.0 # Rescale boxes from img_size to im0 size #print("type : " , type(det)) #print("det : " , det) goods_type = None percent = None more_than_90 = 0 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %s, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if self.save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format #print(cls) if self.save_img or self.view_img: # Add bbox to image goods_type = names[int(cls)] percent = '%.2f' % (conf) #print(type(percent)) percent = float(percent) #print("percent",type(percent)) label = '%s %.2f' % (names[int(cls)], conf) print(percent) if percent > 0.85: #plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) plot_one_box(xyxy, im0, label=label, color=(0, 0, 255), line_thickness=3) total = total + percent more_than_90 += 1 #avg = total/len(det) if more_than_90 != 0: avg = total / more_than_90 avg = round(avg, 2) #print(total) #print(more_than_90) #print(avg) print("names : ", names[int(cls)]) #print("확률 : %.2f", percent ) cv_img, name, color = self.d.load_image(goods_type) if cv_img is not None: qt_img = self.convert_cv_qt(cv_img) self.updateFeatureLable(qt_img) #self.infomsg_append("[DETECT] 품종 : %s, 코드 : %s, 개수 : %d" % (name, goods_type, len(det))) #img_time = datetime.datetime.now().strftime("%Y-%m-%d,%H:%M:%S") img_time = datetime.datetime.now().strftime( "%H:%M:%S") img_date = datetime.datetime.now().strftime( "%Y_%m_%d") self.infomsg_append( img_time + ",%s,%s,%d" % (name, goods_type, more_than_90)) #log_string = img_time + "," + name + ","+goods_type+"," + str(len(det)) +","+ avg log_string = img_time + "," + name + "," + goods_type + "," + str( more_than_90) + "," + str(avg) try: if not os.path.exists("log"): os.makedirs("log") except OSError: print('Error: Creating directory. log') f = open("./log/" + img_date + '_log.csv', mode='at', encoding='utf-8') f.writelines(log_string + '\n') f.close() print(log_string) #print("db 이미지 업로드 성공") #detect 없을 시 else: print("해당 품목 db에서 조회불가 ") self.iv.clear() self.f_label.clear() else: print("detect 없음") #self.infomsg_append("[DETECT] 위 품종은 신규 학습이 필요합니다.") #self.infomsg_append("detect 학습 필요") print(s) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) self.iv.setImage(self.convert_cv_qt(im0)) # Stream results if self.view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if self.save_img: if self.dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) '''
def detect(self, detetConfig=None): detectResult = [] print("检测参数:" + str(detetConfig)) model = self.model save_img = False vw = None # 初始化若干参数 source, view_img, save_txt = detetConfig["source"], detetConfig[ "view_img"], detetConfig["save_txt"] webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path(detetConfig['saveDir']) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir half = self.half device = self.device # 检查图像 imgsz = check_img_size(detetConfig["imgsz"], s=model.stride.max()) # check img_size # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference self.dataset = LoadStreams(source, img_size=imgsz) self.cap = self.dataset.getCap() self.isStream = True vw = videoRecordUtils.createVideoWriter(self.cap) else: save_img = True self.isStream = False self.dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in self.dataset: if self.isDetect == False: break img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=detetConfig["augment"])[0] # Apply NMS pred = non_max_suppression(pred, detetConfig["conf_thres"], detetConfig["iou_thres"], classes=detetConfig["classes"], agnostic=detetConfig["agnostic_nms"]) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % self.dataset.frame if self.dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results detectObjectItems = [] for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if detetConfig["save_conf"] else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) detectObjectItems.append({ "x": int(xyxy[0]), "y": int(xyxy[1]), "w": int(xyxy[2]), "h": int(xyxy[3]), "label": label, "class": int(cls.int()), "conf": float(conf.float()), "color": colors[int(cls)] }) detectResult.append({ "file": p.name, "detectObject": detectObjectItems }) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: ret, buffer = cv2.imencode('.jpg', im0) frame = buffer.tobytes() # record video print("record video....") vw.write(im0) if self.Broardcast: #为节省内存资源 如果队列数量超过 30则清空 if self.q.qsize() > 30: self.q.queue.clear() self.q.put(frame) # Save results (image with detections) if save_img: if self.dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if vw != None: pass videoRecordUtils.closeVideoWrite(vw) print("detect finished.......") return detectResult
def detect(weights='mdp/weights/weights.pt', source='mdp/videos', output='mdp/output', img_size=416, conf_thres=0.01, iou_thres=0.5, device='', classes=None, agnostic_nms=False, augment=False, update=False, scale_percent=50): save_img = True predicted_label = None out, imgsz = output, img_size webcam = source.isnumeric() or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: # save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once row_num = 0 for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].detach().unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in det: predicted_label = names[int(cls)] if predicted_label: label_id = label_id_mapping.get(predicted_label) xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh print(('%s ' * 5 + '\n') % (label_id, *xywh)) # label format # r = requests.post(source, json={'label': label_id}) # send result to rpi # print(r.text) if False and conf < confidence_threshold(label_id): # fine tune for up arrow (white) # cv2.imshow('ImageWindow', im0) break # if not check_bounding_box(xywh): # # cv2.imshow('ImageWindow', im0) # break label = '%s %.2f' % (label_id, conf) good, text = check_bounding_box(xywh, im0.shape[0], im0.shape[1]) if not good: label = text plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # cv2.imshow('ImageWindow', im0) break # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration
def detect(save_img=False): print_div('INTIL') out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size # Initialize print_div('GET DEVICE') set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model print_div('LOAD MODEL') model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier print_div('LOAD MODEL_CLASSIFIER') classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Get names and colors print_div('SET LABEL COLOR') names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference ############################################################################### print_div("RUN INFERENCE") img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once video_path = source cap = cv2.VideoCapture(video_path) print_div('Start Play VIDEO') while cap.isOpened(): ret, frame = cap.read() t0 = time.time() if not ret: print_div('No Frame') break fps_t1 = time.time() img, img0 = img_preprocess(frame) # img: Resize , img0:Orginal img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS : 取得每項預測的數值 pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier : 取得該數值的LAbel if classify: pred = apply_classifier(pred, modelc, img, img0) # Draw Box for i, det in enumerate(pred): s = '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(img0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, img0, label=label, color=colors[int(cls)], line_thickness=3) # Print Results(inference + NMS) print_div('%sDone. (%.3fs)' % (s, t2 - t1)) # Draw Image x, y, w, h = (img0.shape[1] // 4), 25, (img0.shape[1] // 2), 30 cv2.rectangle(img0, (x, 10), (x + w, y + h), (0, 0, 0), -1) rescale = 0.5 re_img0 = (int(img0.shape[1] * rescale), int(img0.shape[0] * rescale)) cv2.putText( img0, '{} | inference: {:.4f}s | fps: {:.4f}'.format( opt.weights[0], t2 - t1, 1 / (time.time() - t0)), (x + 20, y + 20), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) cv2.imshow('Stream_Detected', cv2.resize(img0, re_img0)) key = cv2.waitKey(1) if key == ord('q'): break # After break cap.release() cv2.destroyAllWindows()
def detect(self, save_img=True): global run_sign, shot_sign, frames source, weights, view_img, save_txt, imgsz = self.opt.source, self.opt.weights, self.opt.view_img, self.opt.save_txt, self.opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(self.opt.project) / self.opt.name, exist_ok=self.opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(self.opt.device) # device=torch.device('cuda:0') half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = '', None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() vid_w = Video() for path, img, im0s, vid_cap in dataset: if not run_sign: return img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=self.opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, self.opt.conf_thres, self.opt.iou_thres, classes=self.opt.classes, agnostic=self.opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if self.opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) # print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: # print('append') frames.append(im0) # print(len(frames)) if shot_sign: cv2.imwrite(r'C:\Users\26782\Desktop\test.jpg', im0) shot_sign = False if video_sign: fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # vid_writer = cv2.VideoWriter(save_dir+'.mp4', cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer = Video.video_writer(vid_w, str(save_dir), fourcc, fps, w, h) vid_writer.write(im0) # print(1) # cv2.namedWindow('frame', cv2.WINDOW_NORMAL) # # # `cv2.setWindowProperty('frame', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) # cv2.imshow('frame', im0) # if cv2.waitKey(1) & 0xFF == ord('q'): # break # cv2.imshow(str(p), im0) # cv2.waitKey(1) # 1 millisecond # Save results (image with detections) # if save_img: # if dataset.mode == 'image': # else: # 'video' # # if vid_path != save_path: # new video # # vid_path = save_path # # if isinstance(vid_writer, cv2.VideoWriter): # # vid_writer.release() # release previous video writer if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def streamRecognition(sourceImage): # Initialize source = self._liveview set_logging() weights = 'yolov3-tiny.pt' device = 'cpu' imgsz = 640 webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size # if half: # model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond
def detect(save_img=True): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size image_id = { '0': '10', '1': '6', '2': '7', '3': '8', '4': '9', '5': '5', '6': '2', '7': '4', '8': '3', '9': '1', '10': '11', '11': '12', '12': '13', '13': '14', '14': '15' } #image_id maps the Roboflow ids to the actual ids robo_id_list = list(map(str, range( 1, 16))) #used with Roboflow ids, not the actual ids bimodal_img_loc_dict = {str(i): [] for i in range(1, 16) } #create dict for storing locations for each id webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run save_dir.mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) elif source.startswith("tcp://"): view_img = True dataset = LoadImagesZMQ(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[np.random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once num = 0 loc_list = [] for robot_info_str, img, im0s, vid_cap in dataset: if source.startswith("tcp://"): robot_info = json.loads(robot_info_str) else: robot_info = {"x": 2, "y": 2, "orientation": "NORTH"} #dummy img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 s, im0 = '%g: ' % i, im0s[i].copy() else: s, im0 = '', im0s #save_path = str(save_dir) save_path = os.path.join(os.getcwd(), 'runs\\exp\\') txt_path = str(save_dir) + ('_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %s, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): distance = 2 if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format bbox_label = ( (('%g ' * len(line)).rstrip() % line).split() ) # 0:ID, 1:x, 2:y, 3:w, 4:h img_id, img_x, img_y, img_w, img_h = bbox_label[ 0], float(bbox_label[1]), float( bbox_label[2]), float(bbox_label[3]), float( bbox_label[4]) bimodial_img_loc_list = bimodal_img_loc_dict[ image_id[img_id]] with open(txt_path + '.txt', 'a') as f: f.write( 'id:%s | x:%s | y:%s | w:%s | h:%s | locx:%d | locy:%d' % (image_id[img_id], img_x, img_y, img_w, img_h, robot_info['x'], robot_info['y']) + '\n') if save_img or view_img: # Add bbox to image if img_w < 0.10: #and img_h < 0.17: #finetune estimation!! distance = 3 # partitioning image if (img_x <= 0.4): print("%d, %d : image %s ahead by %d distance" % (robot_info['x'], robot_info['y'], image_id[img_id], distance)) loc = image_ahead(robot_info, distance) elif (0.4 < img_x < 0.6): print("%d, %d : image %s beside by %d distance" % (robot_info['x'], robot_info['y'], image_id[img_id], distance)) loc = image_beside(robot_info, distance) else: print("%d, %d : image %s behind by %d distance" % (robot_info['x'], robot_info['y'], image_id[img_id], distance)) loc = image_behind(robot_info, distance) label = '%s %f %.3f %.3f' % (image_id[img_id], conf, img_x, img_y) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) if (len(bimodial_img_loc_list) == 0 ): #take only 1 photo per id cv2.imwrite(save_path + str(num) + ".jpg", im0) num += 1 bimodial_img_loc_list.append((loc['x'], loc['y'])) #print(bimodial_img_loc_list) bimodial_tuple = tuple( bimodial_img_loc_list ) #convert list to tuple, Counter needs hashable bimod_x, bimod_y = Counter(bimodial_tuple).most_common( )[0][0][0], Counter(bimodial_tuple).most_common( )[0][0][1] #find most occuring (x, y) print(bimod_x, bimod_y) #send image location data = { 'id': image_id[img_id], 'x': bimod_x, 'y': bimod_y } send_time = str(time.time()) #if(len(bimodial_img_loc_list)%2!=0): tx.send_json({ "type": "detection", "data": data, "id": send_time }) with open(txt_path + '.txt', 'a') as f: f.write('%s locx:%d locy:%d' % (send_time, bimod_x, bimod_y) + '\n\n') # Print time (inference + NMS) #print('%sNothing detected (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(str('image'), im0) if cv2.waitKey(1) == ord('q'): # q to quit return
def detect(opt, dp, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # Initialize device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max( )) # check img_size 如果不是32的倍数,就向上取整调整至32的倍数并答应warning if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if opt.use_roi: print(dp.cl) # print(dp.cl[0], dp.cl[1]) # cl = opt.control_line cl = dp.cl roi_in_pixels = np.array([0, cl[0], 1280, cl[1]]) # two points coor, x1, y1, x2, y2 else: roi_in_pixels = None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz, roi=roi_in_pixels) # Get names and colors names = model.module.names if hasattr( model, 'module') else model.names # 解决GPU保存的模型多了module属性的问题 colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # 随机颜色,对应names,names是class # prune # torch_utils.prune(model, 0.7) # model.eval() # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once 空跑一次,释放!!牛逼 detected_img_id = 0 time_list = [None] * len(dataset) for iii, (path, img, im0s, vid_cap, recover) in enumerate(dataset): # print(img.shape, im0s.shape, vid_cap) # exit() # img.shape [3, 384, 640] im0s.shape [720, 1280, 3] None img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # 从[3, h, w]转换为[batch_size, 3, h, w]的形式 # Inference t1 = time_synchronized() # print('aug', opt.augment) # False pred = model(img, augment=opt.augment)[0] # print(pred.shape) [1, 15120, 25] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() print(f'infer time:{t2-t1:.4f}s ', end='') time_list[iii] = t2 - t1 # print('\n', len(pred), pred, recover) # list 长度是bs,代表每张图, 元素tensor,代表检测到的目标,每个tensor.shape [n, 6] xy4, conf, cls # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if opt.use_roi and det is not None: small_img_shape = torch.from_numpy( np.array([recover[1], recover[0]]).astype(np.float)) det[:, 0], det[:, 2] = det[:, 0] + recover[2], det[:, 2] + recover[2] det[:, 1], det[:, 3] = det[:, 1] + recover[3], det[:, 3] + recover[3] else: small_img_shape = img.shape[2::] if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s # im0s是原图 save_path = str(Path(out) / Path(p).name) # output/filenamexxxx.jpg txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') # output/filenamexxxx.txt s += '%gx%g ' % img.shape[2:] # print string, 640x640 gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh # 本来是[720, 1280, 3],重复取,变成[1280, 720, 1280, 720] if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords( small_img_shape, det[:, :4], im0.shape).round() # 转换成原图的x1 y1 x2 y1,像素值 # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)] ) # add to string # i.e. 1 crosswalk # s += f'{det[:, 4].item():.4f} ' # print(n) # Write results for *xyxy, conf, cls in det: if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: x, y, w, h = xywh string = f"{int(cls)} {conf.item():.4f} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n" f.write(string) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) # print(type(im0), im0.shape) array, 720, 1280, 3 if names[int(cls)] in opt.plot_classes: # color = colors[int(cls)] color = (255, 85, 33) plot_one_box(xyxy, im0, label=label, color=color, line_thickness=5) # Print time (inference + NMS) prt_str = '%sDone. (%.5fs)' % (s, t2 - t1) print(prt_str) os.system(f'echo "{prt_str}" >> {opt.output}/detect.log') # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': im0 = dp.dmpost(im0, det, det_id=detected_img_id, filename=Path(p).name, names=names) cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) # print(detected_img_id, p, txt_path) tmp_filename = Path(txt_path).stem im0 = dp.dmpost(im0, det, det_id=detected_img_id, filename=tmp_filename, names=names) vid_writer.write(im0) detected_img_id += 1 if save_txt or save_img: print('Results saved to %s' % os.getcwd() + os.sep + out) if platform == 'darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0)) time_arr = np.array(time_list) prnt = f'Done. Network mean inference time: {np.mean(time_arr):.5f}s, Mean FPS: {1/np.mean(time_arr):.4f}.' print(f'\nModel size {opt.img_size} inference {prnt}') os.system(f'echo "{prnt}" >> {opt.output}/detect.log') os.system(f'echo "useroi {opt.img_size} {prnt}" >> detect2.log')
def continued_detect(save_img=False): #---------------------- Same with detect() ------------------------------------------ source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once #-------------------------------- LoadLibrary ----------------------------------------- chatter = cdll.LoadLibrary('./cpp/chat.so') if chatter.connect_camera() == 0: raise Exception("cannot connect to camera!!") chatter.waitSource.restype = c_char_p chatter.sendResult.argtypes = [c_char_p] while chatter.checkFinished(): source = chatter.waitSource() source = str(source, encoding='utf-8') if len(source) == 0: continue webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://')) if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh object_type = cls.item() chatter.sendResult( bytes(str(send_message), encoding='utf-8')) if save_txt: # Write to file line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) chatter.disconnect_camera()
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once ######################################################################################################## # 에피소드 초기 설정 # 초기 설정(환경, 에이전트, 상태 생성, 에피소드 시종제어 등등) 및 변수(Skip 변수 = 최초 상태 인식 위함) 선언 Environment = RL.Environment() Agent = RL.Agent() Skip = True State = torch.tensor([0., 0., 0., 0., 0.], device='cuda') Action = None Done = False # 게임 활성화 클릭 에피소드 시작 준비 # 활성화 pyautogui.moveTo(x=960, y=640) pyautogui.doubleClick() # 에피소드 시작 준비(완전탐지 딜레이) Agent.Start() ######################################################################################################## # 탐지 모듈(상태 생성기) 루프 for path, img, im0s, vid_cap in dataset: ######################################################################################################## # 에피소드 시작 # 탐지 버퍼 초기화 center_array = [] # 행동 선택 및 시행 if not Skip: print('###########################') print('State : ', State) Action = Agent.Action(State) print('Action ', RL.ACTION_OPTION[Action]) # 현재상태 x 위험행동 => 에피소드 종료 #if str(State)[8:10] == '61' and Action == 'left arrow' or str(State)[8:10] == '62' and 'right arrow': # break ######################################################################################################## img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path # [수정 :: 주석 변환] # save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f'{n} {names[int(c)]}s, ' # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image # [수정 :: func plot_one_box() -> 중심점 플로팅] # 좌상 x = xyxy[0], 좌상 y = xyxy[1], 우하 x = xyxy[2], 우하 y = xyxy[3] label = f'{names[int(cls)]} {conf:.2f}' center = plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # 객체 클래스 및 중심점 저장[클래스 이름, [중심좌표 x, 중심좌표 y]] center_array.append([label[:-5], center]) # 실시간 모니터링 화면 출력 # Stream results if view_img: name = 'Detector' cv2.namedWindow(name) cv2.moveWindow(name, 1920, -550) cv2.resizeWindow(name, 1080, 720) im0 = cv2.resize(im0, (1080, 720)) cv2.imshow(name, im0) ######################################################################################################## # 다음상태 추출(단, 종점이면 다음 상태 = [0, 0, 0, 0, 0]) # 탐지(raw status) -> 상태(converted status) = 격자상태 변환 Next_state = Environment.Step(center_array) # 추출 다음상태 적합성 판단 # 행동 존재 X, 변화 감지 X, 최초상태 할당조건 if Action is None: if torch.equal(State, Next_state) \ and State.tolist() not in [[0, 0, 0, 0, 0], [0, 0, 0, 0, 1]] \ and Next_state.tolist() not in [[0, 0, 0, 0, 0], [0, 0, 0, 0, 1]] \ and State[1] != 0 and Next_state[1] != 0: print('행동 X, 변화 X') Skip = False # 행동 존재 X, 변화 감지 O else: print('행동 X, 변화 O') Skip = True # 행동 존재 O elif Action is not None: # 나뭇가지 개수 인식 및 y좌표 검사 구문 및 조건분기문 # Branch_num = 나뭇가지 개수, Confirm_flag = 나뭇가지 상태변화 확인플래그 Branch_num = 8 Confirm_flag = True # 나뭇가지 개수 파악 while str(State)[Branch_num] != '.': Branch_num += 1 Branch_num = (Branch_num - 8) // 2 # 나뭇가지 상태변화 확인 for i in range(Branch_num): if str(State)[i * 2 + 8] == str(Next_state)[i * 2 + 8]: Confirm_flag = False # 변화감지 확인 O, if Confirm_flag: print('행동 O, 변화 O') print('Next_state : ', Next_state) print('###########################') Skip = False # 변화감지 확인 X, else: print('행동 O, 변화 X') Skip = True continue ######################################################################################################## # ↑ 코드 정리할 것!(간결화 및 정리) # # 다음 프로세스 진행(보상 수여, 스택쌓기 => 신경망 업데이트) 조건 # 배치 제어 if not Skip and Action is not None: # 종료 확인 Done = True if torch.equal(Next_state[1], torch.tensor(0).cuda( device='cuda')) else False # 보상 수여 Reward = torch.tensor(-1.).cuda( device='cuda') if Done else torch.tensor(1.).cuda( device='cuda') # 현재 상태가치 및 다음 상태가치 계산 V_value = Agent.Value(State) Next_V_value = Agent.Value(Next_state) # 어드밴티지 및 갱신 현재 상태가지 계산 Advantage, Q_value = Agent.Advantage_and_Q_value( V_value, Reward, Next_V_value, Done) # 배치 쌓기 Agent.Save_batch(State, Action, Q_value, Advantage) print('Batch_len : ', len(Agent.Batch)) print('AGB : ', Agent.Batch) # 배치에 의한 업데이트 if len(Agent.Batch) == RL.BATCH_SIZE: print('update') exit() Agent.Update_all_network() # 에피소드 종료 및 마지막 배치 업데이트 if Done: RL.BATCH_SIZE = len(Agent.Batch) # 마지막 업데이트 및 프로세스 탈출 break # 상태 전달 및 버퍼 비우기 else: State = Next_state Action = None ######################################################################################################## ######################################################################################################## # 학습 종료 # 마무리 및 저장 시퀀스 정리할 것! print('epi exit') exit()
def detect(self): self.coordinates = [] view_img = False save_txt = False imgsz = 640 webcam = False # Directories save_dir = Path(r"C:\detect") # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device('') half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(self.weights, map_location=device) stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None save_img = True dataset = LoadImages(self.source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, 0.4, 0.45, classes=None, agnostic=False) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' c1, c2 = (int(xyxy[0]), int(xyxy[1])), (int(xyxy[2]), int(xyxy[3])) self.coordinates.append({ 'raw_coordinates': xyxy, 'label': f'{conf:.2f}', 'top_left': c1, 'bottom_right': c2 }) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': img = cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)') return self.coordinates
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model # add by zlf at 20201027 # load FP16 model # model=torch.load(weights)['model'] # for n,p in model.named_parameters(): # print(p.dtype) model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # 20201019 load model by zlf # new method of loading weight;only for 'torch.save(model,state_dict())' # net = Model('./models/yolov5s.yaml').cuda() # state_dict = torch.jit.load('QuantCRNN_1_14000.pt', map_location=torch.device('cpu')) # model = state_dict # model.half().cuda() # model_dict = net.state_dict() # # for k, v in state_dict.items(): # name = k[7:] # remove `module.` # model_dict[name] = v # net.load_state_dict(model_dict, strict=True) # model = net # imgsz = 320 # add by zlf at 20201009 if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True #TODO:cudnn.benchmark = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors # names = model.module.names if hasattr(model, 'module') else model.names names = [ 'CAR', 'CARPLATE', 'BICYCLE', 'TRICYCLE', 'PEOPLE', 'MOTORCYCLE', 'LOGO_AUDI', 'LOGO_BENZE', 'LOGO_BENZC', 'LOGO_BMW', 'LOGO_BUICK', 'LOGO_CHEVROLET', 'LOGO_CITROEN', 'LOGO_FORD', 'LOGO_HONDA', 'LOGO_HYUNDAI', 'LOGO_KIA', 'LOGO_MAZDA', 'LOGO_NISSAN', 'LOGO_PEUGEOT', 'LOGO_SKODA', 'LOGO_SUZUKI', 'LOGO_TOYOTA', 'LOGO_VOLVO', 'LOGO_VW', 'LOGO_ZHONGHUA', 'LOGO_SUBARU', 'LOGO_LEXUS', 'LOGO_CADILLAC', 'LOGO_LANDROVER', 'LOGO_JEEP', 'LOGO_BYD', 'LOGO_BYDYUAN', 'LOGO_BYDTANG', 'LOGO_CHERY', 'LOGO_CARRY', 'LOGO_HAVAL', 'LOGO_GREATWALL', 'LOGO_GREATWALLOLD', 'LOGO_ROEWE', 'LOGO_JAC', 'LOGO_HAFEI', 'LOGO_SGMW', 'LOGO_CASY', 'LOGO_CHANAJNX', 'LOGO_CHANGAN', 'LOGO_CHANA', 'LOGO_CHANGANCS', 'LOGO_XIALI', 'LOGO_FAW', 'LOGO_YQBT', 'LOGO_REDFLAG', 'LOGO_GEELY', 'LOGO_EMGRAND', 'LOGO_GLEAGLE', 'LOGO_ENGLON', 'LOGO_BAOJUN', 'LOGO_DF', 'LOGO_JINBEI', 'LOGO_BAIC', 'LOGO_WEIWANG', 'LOGO_HUANSU', 'LOGO_FOTON', 'LOGO_HAIMA', 'LOGO_ZOTYEAUTO', 'LOGO_MITSUBISHI', 'LOGO_RENAULT', 'LOGO_MG', 'LOGO_DODGE', 'LOGO_FIAT', 'LOGO_INFINITI', 'LOGO_MINI', 'LOGO_TESLA', 'LOGO_SMART', 'LOGO_BORGWARD', 'LOGO_JAGUAR', 'LOGO_HUMMER', 'LOGO_PORSCHE', 'LOGO_LAMBORGHINI', 'LOGO_DS', 'LOGO_CROWN', 'LOGO_LUXGEN', 'LOGO_ACURA', 'LOGO_LINCOLN', 'LOGO_SOUEAST', 'LOGO_VENUCIA', 'LOGO_TRUMPCHI', 'LOGO_LEOPAARD', 'LOGO_ZXAUTO', 'LOGO_LIFAN', 'LOGO_HUANGHAI', 'LOGO_HAWTAI', 'LOGO_REIZ', 'LOGO_CHANGHE', 'LOGO_GOLDENDRAGON', 'LOGO_YUTONG', 'LOGO_HUIZHONG', 'LOGO_JMC', 'LOGO_JMCYUSHENG', 'LOGO_LANDWIND', 'LOGO_NAVECO', 'LOGO_QOROS', 'LOGO_OPEL', 'LOGO_YUEJING' ] colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img # _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: f1.write('%s:' % (path.split('/')[-1])) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # pred = model(img.cuda()) # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls, obj_conf, cls_conf in reversed( det): # add by zlf at 20201026 # add by zlf at 20201019 x1 = int(xyxy[0].item()) y1 = int(xyxy[1].item()) x2 = int(xyxy[2].item()) y2 = int(xyxy[3].item()) f1.write( "[%s,%.2f,%d,%d,%d,%d]" % (names[int(cls.item())], round( (conf.item() * 100), 2), x1, y1, x2, y2)) # add by zlf at 20201019 if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s|%.2f|%.2f|%.2f' % (names[int(cls)], conf, obj_conf, cls_conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) f1.write('\n') # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = not opt.nosave and not source.endswith('.txt') # save inference images webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) ###### set relation initial settings ######### prev_pred = [] related_pic = 5 related_factor = 1.5 factor_choise = torch.tensor([related_factor**(i) for i in range(1,related_pic+1)]) min_wh, max_wh = 2, 4096 ####### set relation initial setting ########## factor_choise = torch.tensor([related_factor**(i) for i in range(1,related_pic+1)]) # Directories save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, multi_label=False, classes=opt.classes, agnostic=opt.agnostic_nms) #### add relation ###### if len(prev_pred)==0: prev_pred.insert(0,pred[0]) pass else: if len(prev_pred)>related_pic: prev_pred.pop() if pred[0] == None: pred[0] = torch.zeros(1,6).to(device) factor_scalar = factor_choise[:len(prev_pred)] # if confidence = [x[:,4] for x in prev_pred] confidence = [confidence[i]*factor_scalar[i] for i in range(len(prev_pred))] for i in range(len(prev_pred)): prev_pred[i] = prev_pred[i].transpose(-1,0) prev_pred[i][4] = confidence[i] prev_pred[i] = prev_pred[i].transpose(-1,0) pred_origin = pred[0] for i in prev_pred: pred[0] = torch.cat((pred[0],i),0) class_nub = pred[0][:,5].tolist() repeat = [] for i in class_nub: a = find_the_same_class(class_nub,i) if len(a)==1: continue else: if a in repeat: continue repeat.append(a) delete = [] for i in repeat: for j in range(len(i)-1): for k in range(j+1,len(i)): if abs(pred[0][i[j]][0]-pred[0][i[k]][0])>10: #define if the items are the same pass else: delete.append(i[k]) pred[0][i[j]][4] = pred[0][i[j]][4] + pred[0][i[k]][4] if pred[0][i[j]][4]>1: pred[0][i[j]][4] = 1 delete= set(delete) origin = set([x for x in range(len(pred[0]))]) remain = list(delete.symmetric_difference(origin)) a = torch.randn(1,6).to(device) for x in remain: b = pred[0][torch.arange(pred[0].size(0))== x] a = torch.cat((a,b),0) a = a[torch.arange(a.size(0))!=0] pred[0] = a pred[0] = pred[0][pred[0][:, 4] > opt.conf_thres-0.1] pred[0] = pred[0][((pred[0][:, 2:4] > min_wh) & (pred[0][:, 2:4] < max_wh)).all(1)] prev_pred.insert(0,pred_origin) # #### add relation ###### t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy(), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect(image, weights): # Final Declaration final_det = None final_class = [] final_text = [] source, weights, save_txt, imgsz, ocr = image, weights, True, 1280, True webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories # save_dir = Path(increment_path(Path('runs/detect') / 'exp', exist_ok=False)) # increment run # (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device('') half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, 0.6, 0.45, classes=None, agnostic=False) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path # save_path = str(save_dir / p.name) # img.jpg # txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size print() print(img.shape[2:], " ---------------------- ", im0.shape) det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() final_det = det[:, :4].tolist() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # python detect.py --source test\ --weights weights\model.pt --save-txt --save-conf final_class_reverse = [] for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (conf, cls, *xywh) if True else (cls, *xywh) # label format # print(names[int(cls)], str(line[0].squeeze().tolist()), 'box(xywh) = ' + str(line[2:])) final_class_reverse.append( names[int(cls)] + '(' + str(round((line[0].squeeze().tolist()), 2)) + ')') for i in final_class_reverse: final_class.append(i) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') if ocr: # Opens a image in RGB mode im = Image.open(path) newlist = [] for I in final_det: # [27.0, 14.0, 177.0, 91.0] left, top, right, bottom = I[0], I[1], I[2], I[3] im1 = im.crop((left, top, right, bottom)) # imimimim = np.asarray(im1) # text = reader.readtext(imimimim, detail=0) # print(text) text = pytesseract.image_to_string(im1) # print(text) # print(text.split('\n')) newlist = text.split('\n') newlist.pop(-1) final_text.append(' '.join(map(str, newlist))) # im1.show() print('LENGHTS', len(final_class), len(final_text)) final_class.reverse() res = dict(zip(final_class, final_text)) print("Resultant dictionary is : " + str(res)) return res
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith('.txt') or source.lower( ).startswith( ('rtsp://', 'rtmp://', 'http://')) #source = data/images' by default # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) ) # increment run. save_dir = WindowsPath('runs/detect/exp3') by default. (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() #Do basic configuration for the logging system. device = select_device( device=opt.device) #device = 'cpu' or '0' or '0,1,2,3' half = device.type != 'cpu' # half precision only supported on CUDA # Load model #attempt_load Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a. device = device(type='cpu') by default model = attempt_load(weights, map_location=device) # load FP32 model. # Verify img_size is a multiple of stride s imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() #Searching cicly for images print("Looking for images...") while True: time.sleep(3) # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True file_names = [ f for f in os.listdir(source) if f.endswith(('.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng')) ] #print(len(file_names)) if len(file_names) == 0: continue dataset = LoadImages( source, img_size=imgsz) #Doesnt run when there is no images # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: #print("----------------") #print(path) #print(vid_cap) #print("----------------") #Deleting photos if dataset.mode == 'images': os.remove(path) img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format #with open(txt_path + '.txt', 'a') as f: # f.write(('%g ' * len(line)).rstrip() % line + '\n') with open( newRoute + "etiquetas/" + txt_path.split("/")[-1] + '.txt', 'a') as f: f.write( str(names[int(cls)]) + " " + (str(conf).split("tensor(")[-1] ).replace(")", "") + '\n') resp = requests.post( 'https://deepgaze.xyz/api/images/store-data', headers={ 'Content-Type': 'application/json', 'Accept': 'application/json' }, data='{"name": "' + str(names[int(cls)]) + '", "porcent":"' + (str(conf).split("tensor(")[-1]).replace( ")", "") + '", "nameImg": "' + save_path.split("/")[-1] + '"}') print(resp.status_code) print(resp.text) if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': if originalRoute == True: #print("---------------------") #print(save_path) #print(save_path.split("\\")[-1]) #print("---------------------") cv2.imwrite(save_path, im0) else: cv2.imwrite( newRoute + "yolo_" + save_path.split("/")[-1], im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print('Done. (%.3fs)' % (time.time() - t0)) if dataset.mode != 'images': break else: print("Looking for images...")
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: save_img = True dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] ct = CentroidTracker() # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() memory = {} people_counter = 0 detect_frame_num = 0 before = [] for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() #img_center_y = int(im0.shape[0]//2) #line = [(0,int(img_center_y*1.3)),(int(im0.shape[1]*0.55),int(img_center_y*1.3))] #cv2.line(im0,line[0],line[1],(0,0,255),5) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh index_id = [] previous = memory.copy() memory = {} boxes = [] if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): xyxy_list = torch.tensor(xyxy).view(1, 4).view(-1).tolist() # center_x = int(np.mean([xyxy_list[0],xyxy_list[2]])) # center_y = int(np.mean([xyxy_list[1],xyxy_list[3]])) # cv.circle(im,(center_x)) xywh_list = xyxy2xywh(torch.tensor(xyxy).view( 1, 4)).view(-1).tolist() boxes.append(xywh_list) for box in boxes: (x, y) = (int(box[0]), int(box[1])) (w, h) = (int(box[2]), int(box[3])) if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) #cv2.putText(im0,'Person : {}'.format(final_person_cnt),(130,100),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) #cv2.putText(im0,'Car : {}'.format(final_car_cnt),(130,150),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255),3) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): # cv2.namedWindow("0", cv2.WND_PROP_FULLSCREEN) # cv2.setWindowProperty("0", cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: face_box = True temp, tempstr = read_sensor() img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s)[0] # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count # im0 = cv2.flip(im0, 1) ht, wd, _ = im0.shape temp_img = im0.copy() font = cv2.FONT_HERSHEY_SIMPLEX text_position = wd // 7, int(ht * 0.08) fontScale = 0.8 fontColor = (255, 255, 255) lineType = 2 label = 'Please fill the ellipse to start detection!' cv2.putText(im0, label, text_position, 0, fontScale, [0, 0, 0], thickness=2, lineType=cv2.LINE_AA) cv2.ellipse(im0, (wd // 2, ht // 2), (wd // 4, ht // 2 - 10), 0, 0, 360, (120, 150, 50), 2, cv2.LINE_AA) try: bbox = list(map(lambda x: int(x), det[0])) # print(bbox) # print(wd//5, wd//2.5) # if bbox[0] > wd//5 and bbox[2] < wd//2: cm_scale = 100 area = ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1])) // 100 print(f"AREA {area} cm") print(f"xmin: {bbox[0]}, xmax: {bbox[2]}") print("------------------------") if area < 320 and (bbox[0] > 80 or bbox[2] < 290): # print("SMALLER") face_box = False if names[0] == "nomask" and area > 240: face_box = True except: pass else: p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det) and face_box: # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f'{n} {names[int(c)]}s, ' # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]} {conf:.2f}' im0 = temp_img plot_one_box(xyxy, tempstr, im0, label=label, color=colors[int(cls)], line_thickness=3) if names[int(cls)] == 'mask' and temp > 37: submitpage1() mainloop() print("status: ", Status.status) # try: # bbox = list(map(lambda x : int(x), det[0])) # print("detection", bbox[:4]) # except: # pass # Print time (inference + NMS) # print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect(file_name): #print('-->start anju_detect') #source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size agnostic_nms = False augment = False classes = None conf_thres = 0.25 device = '' exist_ok = True imgsz = 640 iou_thres = 0.45 name = 'result_img' nosave = False project = 'static' save_conf = False save_txt = True source = 'uploads/' + file_name update = False view_img = True weights = 'best.pt' save_img = True # Directories #print('-->detect') save_dir = Path(increment_path(Path(project) / name, exist_ok=True)) # increment run # Initialize #print('-->Initialize') set_logging() device = select_device(device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model #print('-->Load model') model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size if half: model.half() # to FP16 # Second-stage classifier print('-->Second-stage classifier') classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader print('-->Set Dataloader') vid_path, vid_writer = None, None #print('-->3 source:',source) #print('-->3 file_name:',file_name) #print('-->4 source:',source) dataset = LoadImages(source, img_size=imgsz, stride=stride) # Get names and colors print('-->Get names and colors') names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference print('-->Run inference') if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() labels = [] for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference print('-->Inference') t1 = time_synchronized() pred = model(img, augment=augment)[0] # Apply NMS print('-->Apply NMS') pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms) t2 = time_synchronized() # Apply Classifier print('-->Apply Classifier') if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections print('-->Process detections') for i, det in enumerate(pred): # detections per image p, s, im0, frame = path, '', im0s, getattr(dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh # ysc 20210420 delete old file , add labe list if os.path.isfile(txt_path + '.txt'): os.remove(txt_path + '.txt') if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results print('-->Print results') for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results print('-->Write results') for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file #20210420 ysc save label name #xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh #line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh) # label format label = f'{names[int(cls)]} {conf:.2f}' # detected label name with open(txt_path + '.txt', 'a') as f: #f.write(('%g ' * len(line)).rstrip() % line + '\n') # print labe map f.write(label + '\n') if save_img or view_img: # Add bbox to image label = f'{names[int(cls)]}-{conf:.2f}' plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) print('-->label :', label) labels.append(label) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Save results (image with detections) print('-->Save results (image with detections)') if save_img: cv2.imwrite(save_path, im0) print('labels=', labels) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") #print(f'Done. ({time.time() - t0:.3f}s)') return labels
def detect(opt, save_img=False): out, source, weights, imgsz, namelist = \ opt.output, opt.source, opt.weights, opt.img_size, opt.namelist set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) #if device.type != 'cpu' else None # run once idx = 0 ckname = [] for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 idx += 1 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) cnt = 0 # Process detections for i, det in enumerate(pred): # detections per image p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() cntname = 0 # Write results img2 = im0.copy() nperson = [] nname = [] for *xyxy, conf, cls in reversed(det): if save_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) ######################################################################################################## ##classes 변수 생성 (이름) classes = names[int(cls)] ##classes 변수 함수에 추가 plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3, classes=classes) ##사람이라고 판단한 물체의 각 좌표 리스트에 저장 if classes == 'person': nperson.append([ int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]) ]) if classes == 'name': nname.append([ int(xyxy[0]), int(xyxy[1]), int(xyxy[2]), int(xyxy[3]) ]) ##이름 리스트의 크기가 0보다 클 때 미리 복사해둔 프레임의 구역으로 이미지 덮기 #print(len(nperson)) if len(nname) > 0: key = 45 for pi in range(len(nperson)): check = False for ii in range(len(nname)): if nname[ii][1]>=nperson[pi][1] and nname[ii][3]<=nperson[pi][3] and nname[ii][0]>=nperson[pi][0] \ and nname[ii][2]<=nperson[pi][2] and check==False: check = True proi = img2[nname[ii][1]:nname[ii][3], nname[ii][0]:nname[ii][2]] temp_img = "{0}_{1}_{2}_{3}.jpg".format( nname[ii][1], nname[ii][3], nname[ii][0], nname[ii][2]) image_path = "./temp/{0}".format(temp_img) img_shape = proi.shape # print(proi) #image_path2 = "./temp/tt_{0}".format(temp_img) ####################################### encrypt_function(proi, image_path, key) # os.remove(image_path) text_ = decrypt_function( image_path, key, img_shape) #cv2.imwrite(image_path2, text_) ######################################### #print("coord:",nname[ii][1],nname[ii][3],nname[ii][0],nname[ii][2]) # OCR (이름 매칭 확인) => return True / False result, check_name = ocr.check_name( text_, namelist) if result == True: cntname += 1 if check_name not in ckname: ckname.append(check_name) roi = img2[nperson[pi][1]:nperson[pi][3], nperson[pi][0]:nperson[pi][2]] im0[nperson[pi][1]:nperson[pi][3], nperson[pi][0]:nperson[pi][2]] = roi #cv2.imwrite('.\check\{}.jpg'.format(idx),im0) ######################################################################################################## # Print time (inference + NMS) #print('%sDone. (%.3fs)' % (s, t2 - t1)) removeAllFile('./temp') # Save results (image with detections) if save_img: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release() # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( './output.mp4', cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0)
def detect(image, save_img=False): out, source, weights, view_img, save_txt, imgsz = \ 'inference/output', image, ['yolov5s.pt'], False, False, 416 webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # Initialize set_logging() device = select_device('') if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] random.seed(4) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) parser = argparse.ArgumentParser() # Inference t1 = time_synchronized() pred = model(img, augment=False)[0] # Apply NMS pred = non_max_suppression(pred, 0.4, 0.5, classes=None, agnostic=False) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=2) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not False: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0)) return im0
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) folder_main = out.split('/')[0] if os.path.exists(out): shutil.rmtree(out) # delete output folder folder_features = folder_main + '/features' if os.path.exists(folder_features): shutil.rmtree(folder_features) # delete features output folder folder_crops = folder_main + '/image_crops' if os.path.exists(folder_crops): shutil.rmtree(folder_crops) # delete output folder with object crops os.makedirs(out) # make new output folder os.makedirs(folder_features) # make new output folder os.makedirs(folder_crops) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # frames per second fps = dataset.cap.get(cv2.CAP_PROP_FPS) critical_time_frames = opt.time * fps # COUNTER: initialization counter = VoteCounter(critical_time_frames, fps) print('CRITICAL TIME IS ', opt.time, 'sec, or ', counter.critical_time, ' frames') # Find index corresponding to a person idx_person = names.index("person") # Deep SORT: initialize the tracker cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # AlphaPose: initialization args_p = update_config(opt.config_alphapose) cfg_p = update_config(args_p.ALPHAPOSE.cfg) args_p.ALPHAPOSE.tracking = args_p.ALPHAPOSE.pose_track or args_p.ALPHAPOSE.pose_flow demo = SingleImageAlphaPose(args_p.ALPHAPOSE, cfg_p, device) output_pose = opt.output.split('/')[0] + '/pose' if not os.path.exists(output_pose): os.mkdir(output_pose) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # COUNTER: compute urn centoid (1st frame only) and plot a bounding box around it if dataset.frame == 1: counter.read_urn_coordinates(opt.urn, im0s, opt.radius) counter.plot_urn_bbox(im0s) # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Deep SORT: person class only idxs_ppl = ( det[:, -1] == idx_person ).nonzero(as_tuple=False).squeeze( dim=1) # 1. List of indices with 'person' class detections dets_ppl = det[idxs_ppl, : -1] # 2. Torch.tensor with 'person' detections print('\n {} people were detected!'.format(len(idxs_ppl))) # Deep SORT: convert data into a proper format xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu") confs = dets_ppl[:, 4].to("cpu") # Deep SORT: feed detections to the tracker if len(dets_ppl) != 0: trackers, features = deepsort.update(xywhs, confs, im0) # tracks inside a critical sphere trackers_inside = [] for i, d in enumerate(trackers): plot_one_box(d[:-1], im0, label='ID' + str(int(d[-1])), color=colors[1], line_thickness=1) # COUNTER d_include = counter.centroid_distance( d, im0, colors[1], dataset.frame) if d_include: trackers_inside.append(d) # ALPHAPOSE: show skeletons for bounding boxes inside the critical sphere if len(trackers_inside) > 0: pose = demo.process('frame_' + str(dataset.frame), im0, trackers_inside) im0 = demo.vis(im0, pose) demo.writeJson([pose], output_pose, form=args_p.ALPHAPOSE.format, for_eval=args_p.ALPHAPOSE.eval) counter.save_features_and_crops( im0, dataset.frame, trackers_inside, features, folder_main) cv2.putText(im0, 'Voted ' + str(len(counter.voters_count)), (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 255), 2) print('NUM VOTERS', len(counter.voters)) print(list(counter.voters.keys())) # COUNTER if len(counter.voters) > 0: counter.save_voter_trajectory(dataset.frame, folder_main) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): # 获取输出文件夹,输入源,权重,参数等参数 source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) # 获取设备 # 如果设备为gpu,使用Float16 half = device.type != 'cpu' # half precision only supported on CUDA # Load model # 加载Float32模型,确保用户设定的输入图片分辨率能整除32(如不能则调整为能整除并返回) model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: # 设置Float16 model.half() # to FP16 # Second-stage classifier # 设置第二次分类,默认不使用 classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader # 通过不同的输入源来设置不同的数据加载方式 vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True # 如果检测视频的时候想显示出来,可以在这里加一行view_img = True # view_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors # 获取类别名字 names = model.module.names if hasattr(model, 'module') else model.names # 设置画框的颜色 colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() # 进行一次前向推理,测试程序是否正常 img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once """ path 图片/视频路径 img 进行resize+pad之后的图片 img0 原size图片 cap 当读取图片时为None,读取视频时为视频源 """ for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) # 图片也设置为Float16 img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 # 没有batch_size的话则在最前面添加一个轴 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() """ 前向传播 返回pred的shape是(1, num_boxes, 5+num_class) h,w为传入网络图片的长和宽,注意dataset在检测时使用了矩形推理,所以这里h不一定等于w num_boxes = h/32 * w/32 + h/16 * w/16 + h/8 * w/8 pred[..., 0:4]为预测框坐标 预测框坐标为xywh(中心点+宽长)格式 pred[..., 4]为objectness置信度 pred[..., 5:-1]为分类结果 """ pred = model(img, augment=opt.augment)[0] # Apply NMS """ pred:前向传播的输出 conf_thres:置信度阈值 iou_thres:iou阈值 classes:是否只保留特定的类别 agnostic:进行nms是否也去除不同类别之间的框 经过nms之后,预测框格式:xywh-->xyxy(左上角右下角) pred是一个列表list[torch.tensor],长度为batch_size 每一个torch.tensor的shape为(num_boxes, 6),内容为box+conf+cls """ pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier # 添加二次分类,默认不使用 if classify: pred = apply_classifier(pred, modelc, img, im0s) # 裁剪区域的标签,自加 roi_num = 0 # Process detections # 对每一张图片作处理 for i, det in enumerate(pred): # detections per image # 如果输入源是webcam,则batch_size不为1,取出dataset中的一张图片 if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s # 设置保存图片/视频的路径 save_path = str(save_dir / p.name) # 设置保存框坐标txt文件的路径 txt_path = str(save_dir / 'labels' / p.stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') # 设置打印信息(图片长宽) s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size # 调整预测框的坐标:基于resize+pad的图片的坐标-->基于原size图片的坐标 # 此时坐标格式为xyxy det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results # 打印检测到的类别数量 for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): # 裁剪区域的标签,自加 roi_num += 1 if save_txt: # Write to file # 将xyxy(左上角+右下角)格式转为xywh(中心点+宽长)格式,并除上w,h做归一化,转化为列表再保存 xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') # 在原图上画框 if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) # 将检测结果选取出来,自加 roi_l, roi_u, roi_r, roi_d = int(xyxy[0]), int( xyxy[1]), int(xyxy[2]), int(xyxy[3]) roi = im0[roi_u:roi_d, roi_l:roi_r] cv2.imwrite( save_path[:-4] + '_' + str(roi_num) + '.jpg', roi) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3) # Print time (inference + NMS) # 打印前向传播+nms时间 print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results # 如果设置展示,则show图片/视频 if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) # 设置保存图片/视频 if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) print(save_path) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % save_dir) # 打开保存图片和txt的路径(好像只适用于MacOS系统) # 打印总时间 print('Done. (%.3fs)' % (time.time() - t0))
def detect(opt): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size save_img = not opt.nosave and not source.endswith( '.txt') # save inference images webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith( ('rtsp://', 'rtmp://', 'http://', 'https://')) # Directories save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model stride = int(model.stride.max()) # model stride imgsz = check_img_size(imgsz, s=stride) # check img_size names = model.module.names if hasattr( model, 'module') else model.names # get class names if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz, stride=stride) else: dataset = LoadImages(source, img_size=imgsz, stride=stride) # Run inference if device.type != 'cpu': model( torch.zeros(1, 3, imgsz, imgsz).to(device).type_as( next(model.parameters()))) # run once t0 = time.time() for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0, frame = path[i], '%g: ' % i, im0s[i].copy( ), dataset.count else: p, s, im0, frame = path, '', im0s.copy(), getattr( dataset, 'frame', 0) p = Path(p) # to Path save_path = str(save_dir / p.name) # img.jpg txt_path = str(save_dir / 'labels' / p.stem) + ( '' if dataset.mode == 'image' else f'_{frame}') # img.txt s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or opt.save_crop or view_img: # Add bbox to image c = int(cls) # integer class label = None if opt.hide_labels else ( names[c] if opt.hide_conf else f'{names[c]} {conf:.2f}') plot_one_box(xyxy, im0, label=label, color=colors(c, True), line_thickness=opt.line_thickness) if opt.save_crop: save_one_box(xyxy, im0s, file=save_dir / 'crops' / names[c] / f'{p.stem}.jpg', BGR=True) # Print time (inference + NMS) print(f'{s}Done. ({t2 - t1:.3f}s)') # Stream results if view_img: cv2.imshow(str(p), im0) cv2.waitKey(1) # 1 millisecond # Save results (image with detections) if save_img: if dataset.mode == 'image': cv2.imwrite(save_path, im0) else: # 'video' or 'stream' if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer if vid_cap: # video fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) else: # stream fps, w, h = 30, im0.shape[1], im0.shape[0] save_path += '.mp4' vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print(f'Done. ({time.time() - t0:.3f}s)')
def detect(save_img=False): source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.endswith( '.txt') or source.lower().startswith(('rtsp://', 'rtmp://', 'http://')) # Directories save_dir = Path( increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir( parents=True, exist_ok=True) # make dir # Initialize set_logging() device = select_device(opt.device) half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in names] # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy() else: p, s, im0 = Path(path), '', im0s save_path = str(save_dir / p.name) txt_path = str(save_dir / 'labels' / p.stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Write results for *xyxy, conf, cls in reversed(det): if save_txt: # Write to file xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if opt.save_conf else ( cls, *xywh) # label format with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * len(line)).rstrip() % line + '\n') if save_img or view_img: # Add bbox to image label = '%s %.2f' % (names[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=1) # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(str(p), im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else '' print(f"Results saved to {save_dir}{s}") print('Done. (%.3fs)' % (time.time() - t0))
def detect(save_img=False): out, source, weights, view_img, save_txt, imgsz = \ opt.output, opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size webcam = source.isnumeric() or source.startswith( 'rtsp') or source.startswith('http') or source.endswith('.txt') # Initialize set_logging() device = select_device(opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder if os.path.exists(opt.features): shutil.rmtree(opt.features) # delete features output folder if os.path.exists(opt.crops): shutil.rmtree(opt.crops) # delete output folder with object crops os.makedirs(out) # make new output folder os.makedirs(opt.features) # make new output folder os.makedirs(opt.crops) # make new output folder half = device.type != 'cpu' # half precision only supported on CUDA # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size if half: model.half() # to FP16 # Second-stage classifier classify = False if classify: modelc = load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=imgsz) else: save_img = True dataset = LoadImages(source, img_size=imgsz) # log file dictionary: save frames when track_id object is detected log_frames = {"FPS": dataset.cap.get(cv2.CAP_PROP_FPS)} print("FRAMES PER SECOND ", dataset.cap.get(cv2.CAP_PROP_FPS)) # Get names and colors names = model.module.names if hasattr(model, 'module') else model.names colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] # Find index corresponding to a person idx_person = names.index("person") # Deep SORT: initialize the tracker cfg = get_config() cfg.merge_from_file(opt.config_deepsort) deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT, max_dist=cfg.DEEPSORT.MAX_DIST, min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE, nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP, max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE, max_age=cfg.DEEPSORT.MAX_AGE, n_init=cfg.DEEPSORT.N_INIT, nn_budget=cfg.DEEPSORT.NN_BUDGET, use_cuda=True) # Run inference t0 = time.time() img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once for path, img, im0s, vid_cap in dataset: img = torch.from_numpy(img).to(device) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 if img.ndimension() == 3: img = img.unsqueeze(0) # Inference t1 = time_synchronized() pred = model(img, augment=opt.augment)[0] # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms) t2 = time_synchronized() # Apply Classifier if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '%g: ' % i, im0s[i].copy() else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) txt_path = str(Path(out) / Path(p).stem) + ( '_%g' % dataset.frame if dataset.mode == 'video' else '') s += '%gx%g ' % img.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '%g %ss, ' % (n, names[int(c)]) # add to string # Deep SORT: person class only idxs_ppl = ( det[:, -1] == idx_person ).nonzero(as_tuple=False).squeeze( dim=1) # 1. List of indices with 'person' class detections dets_ppl = det[idxs_ppl, : -1] # 2. Torch.tensor with 'person' detections print('\n {} people were detected!'.format(len(idxs_ppl))) # Deep SORT: convert data into a proper format xywhs = xyxy2xywh(dets_ppl[:, :-1]).to("cpu") confs = dets_ppl[:, 4].to("cpu") # Deep SORT: feed detections to the tracker if len(dets_ppl) != 0: trackers, features = deepsort.update(xywhs, confs, im0) for d in trackers: ##### DEEP SORT feature object saver #### track_id = d[4] fname_features = opt.features + '/ID_{}'.format( track_id) fname_crops = opt.crops + '/ID_{}'.format(track_id) if not os.path.exists(fname_features): os.mkdir(fname_features) os.mkdir(fname_crops) log_frames['ID_' + str(track_id)] = [] # choose format to save feature arrays on your machine: # https://machinelearningmastery.com/how-to-save-a-numpy-array-to-file-for-machine-learning/ save_format = 'csv' filename = fname_features + "/feature_frame_" + str( dataset.frame) if save_format == 'csv': savetxt(filename + '.csv', features[track_id], delimiter=',') #data = numpy.loadtxt('data.csv', delimiter=',') elif save_format == 'npy': save(filename + '.npy', features[track_id]) #data = numpy.load('data.npy') elif save_format == 'npz': savez_compressed(filename + '.npz', features[track_id]) # dict_data = load('data.npz'); data = dict_data['arr_0'] # update log file with track_id detection history log_frames['ID_' + str(track_id)].append(dataset.frame) # save croped image im_crop = im0[d[1]:d[3], d[0]:d[2], :] cv2.imwrite(filename=fname_crops + "/image_crop_" + str(dataset.frame) + '.jpg', img=im_crop) plot_one_box(d[:4], im0, label='ID' + str(int(d[4])), color=colors[1], line_thickness=1) # DEEP SORT: save updated log file log_format = 'txt' if log_format == 'txt': f_log = open(opt.features + "/log_detection.txt", "w") f_log.write(str(log_frames)) elif log_format == 'pkl': f_log = open(opt.features + "/log_detection.pkl", "wb") pickle.dump(log_frames, f_log) f_log.close() ################################### # Print time (inference + NMS) print('%sDone. (%.3fs)' % (s, t2 - t1)) # Stream results if view_img: cv2.imshow(p, im0) if cv2.waitKey(1) == ord('q'): # q to quit raise StopIteration # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fourcc = 'mp4v' # output video codec fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to %s' % Path(out)) if platform.system() == 'Darwin' and not opt.update: # MacOS os.system('open ' + save_path) print('Done. (%.3fs)' % (time.time() - t0))