def predict(self, obj, mode: str = "image"): # Make prediction if mode == "image": image = obj[:, :, ::-1] image_visualizer = Visualizer(image, metadata=self.metadata, instance_mode=self.instance_mode, scale=1.2) outputs = self.predictor(obj) instances = outputs["instances"].to("cpu") instances.remove('pred_classes') vis_output = image_visualizer.draw_instance_predictions(instances) elif mode == "video": video_visualizer = VideoVisualizer( metadata=self.metadata, instance_mode=self.instance_mode) outputs, vis_output = [], [] while obj.isOpened(): success, frame = obj.read() if success: output = self.predictor(frame) outputs.append(output) instances = output["instances"].to("cpu") frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) vis_frame = video_visualizer.draw_instance_predictions( frame, instances) vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) vis_output.append(vis_frame) else: break return outputs, vis_output
class AnnotateVideo(Pipeline): """Pipeline task for video annotation.""" def __init__(self, dst, metadata_name, instance_mode=ColorMode.IMAGE): self.dst = dst self.metadata_name = metadata_name self.metadata = MetadataCatalog.get(self.metadata_name) self.instance_mode = instance_mode self.cpu_device = torch.device("cpu") self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) super().__init__() def map(self, data): dst_image = data["image"].copy() data[self.dst] = dst_image self.annotate_frame_num(data) self.annotate_predictions(data) return data def annotate_frame_num(self, data): dst_image = data[self.dst] frame_idx = data["frame_num"] put_text(dst_image, f"{frame_idx:04d}", (0, 0), color=colors.get("white").to_bgr(), bg_color=colors.get("black").to_bgr(), org_pos="tl") def annotate_predictions(self, data): if "predictions" not in data: return dst_image = data[self.dst] dst_image = dst_image[:, :, ::-1] # Convert OpenCV BGR to RGB format predictions = data["predictions"] if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_image = self.video_visualizer.draw_panoptic_seg_predictions(dst_image, panoptic_seg.to(self.cpu_device), segments_info) elif "sem_seg" in predictions: sem_seg = predictions["sem_seg"].argmax(dim=0) vis_image = self.video_visualizer.draw_sem_seg(dst_image, sem_seg.to(self.cpu_device)) elif "instances" in predictions: instances = predictions["instances"] vis_image = self.video_visualizer.draw_instance_predictions(dst_image, instances.to(self.cpu_device)) # Converts RGB format to OpenCV BGR format vis_image = cv2.cvtColor(vis_image.get_image(), cv2.COLOR_RGB2BGR) data[self.dst] = vis_image
def prediction_on_video(video): model = "modelsfiles/model_final.pth" config = "modelsfiles/config.yml" threshold = 0.5 save_path = "output" predictor, cfg = get_model(model, config, threshold) parser = argparse.ArgumentParser( description='Detect objects from webcam images') parser.add_argument('-s', '--show', default=True, action="store_false", help='Show output') parser.add_argument( '-sp', '--save_path', type=str, default='', help='Path to save the output. If None output won\'t be saved') args = parser.parse_args() print("Started") video_file = video #"/home/oem/Downloads/video.mp4" cap = cv2.VideoCapture(video_file) if not cap.isOpened(): print("Error opening video stream or file") MetadataCatalog.get("customtrain").thing_classes = [ 'ear plugs', 'welding shield' ] metadata = MetadataCatalog.get("customtrain") while cap.isOpened(): ret, image = cap.read() outputs = predictor(image) #v = Visualizer(image[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2) #VideoVisualizer #v = Visualizer(image[:, :, ::-1], metadata, scale=1.2) video_visualizer = VideoVisualizer(metadata, instancemode) v = video_visualizer.draw_instance_predictions( image, outputs["instances"].to("cpu")) #v = v.draw_instance_predictions(outputs["instances"].to("cpu")) if args.show: ui_main_window = Ui_MainWindow() ui_main_window.displayImage( cv2.imshow('object_detection', v.get_image()[:, :, ::-1])) #cv2.imshow('object_detection', v.get_image()[:, :, ::-1]) if cv2.waitKey(25) & 0xFF == ord('q'): break
def main(): args = parse_args() with open(args.config, "r") as f: config = yaml.safe_load(f) if "classes" not in config: raise Exception("Could not find class names") n_classes = len(config["classes"]) classes = config["classes"] cfg = get_cfg() cfg.merge_from_file(args.model_config) cfg.DATASETS.TRAIN = () cfg.DATALOADER.NUM_WORKERS = 2 cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.BASE_LR = 0.00025 cfg.SOLVER.MAX_ITER = 50000 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes) if args.model_weights is None: cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") else: cfg.MODEL.WEIGHTS = args.model_weights cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 # set the testing threshold for this model cfg.DATASETS.TEST = ("custom_test",) predictor = DefaultPredictor(cfg) DatasetCatalog.register("custom_test", lambda d="test": None) MetadataCatalog.get("custom_test").set(thing_classes=classes) custom_metadata = MetadataCatalog.get("custom_test") os.makedirs(args.output, exist_ok=True) cap = cv2.VideoCapture(args.video) n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) vis = VideoVisualizer(metadata=custom_metadata) for i in tqdm.tqdm(range(0, n_frames, args.skip_frames)): assert cap.isOpened() cap.set(cv2.CAP_PROP_POS_FRAMES, i) success, image = cap.read() assert success outputs = predictor(image) v = vis.draw_instance_predictions( image[:, :, ::-1], outputs["instances"].to("cpu")) filename = os.path.join(args.output, "prediction_%09d.jpg" % i) cv2.imwrite(filename, v.get_image()[:, :, ::-1]) plt.show()
class AnnotateVideo(Pipeline): """Pipeline task for video annotation.""" def __init__(self, dst, metadata_name, instance_mode=ColorMode.IMAGE, frame_num=True, predictions=True, pose_flows=True): self.dst = dst self.metadata_name = metadata_name self.metadata = MetadataCatalog.get(self.metadata_name) self.instance_mode = instance_mode self.frame_num = frame_num self.predictions = predictions self.pose_flows = pose_flows self.cpu_device = torch.device("cpu") self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) super().__init__() def map(self, data): dst_image = data["image"].copy() data[self.dst] = dst_image if self.frame_num: self.annotate_frame_num(data) if self.predictions: self.annotate_predictions(data) if self.pose_flows: self.annotate_pose_flows(data) return data def annotate_frame_num(self, data): dst_image = data[self.dst] frame_idx = data["frame_num"] put_text(dst_image, f"{frame_idx:04d}", (0, 0), color=colors.get("white").to_bgr(), bg_color=colors.get("black").to_bgr(), org_pos="tl") def annotate_predictions(self, data): if "predictions" not in data: return dst_image = data[self.dst] dst_image = dst_image[:, :, ::-1] # Convert OpenCV BGR to RGB format predictions = data["predictions"] if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_image = self.video_visualizer.draw_panoptic_seg_predictions( dst_image, panoptic_seg.to(self.cpu_device), segments_info) elif "sem_seg" in predictions: sem_seg = predictions["sem_seg"].argmax(dim=0) vis_image = self.video_visualizer.draw_sem_seg( dst_image, sem_seg.to(self.cpu_device)) elif "instances" in predictions: instances = predictions["instances"] vis_image = self.video_visualizer.draw_instance_predictions( dst_image, instances.to(self.cpu_device)) # Converts RGB format to OpenCV BGR format vis_image = cv2.cvtColor(vis_image.get_image(), cv2.COLOR_RGB2BGR) data[self.dst] = vis_image def annotate_pose_flows(self, data): if "pose_flows" not in data: return predictions = data["predictions"] instances = predictions["instances"] keypoints = instances.pred_keypoints.cpu().numpy() l_pairs = [ (0, 1), (0, 2), (1, 3), (2, 4), # Head (5, 6), (5, 7), (7, 9), (6, 8), (8, 10), (6, 12), (5, 11), (11, 12), # Body (11, 13), (12, 14), (13, 15), (14, 16) ] dst_image = data[self.dst] height, width = dst_image.shape[:2] pose_flows = data["pose_flows"] pose_colors = list(colors.items()) pose_colors_len = len(pose_colors) for idx, pose_flow in enumerate(pose_flows): pid = pose_flow["pid"] pose_color_idx = ((pid * 10) % pose_colors_len + pose_colors_len) % pose_colors_len pose_color_bgr = pose_colors[pose_color_idx][1].to_bgr() (start_x, start_y, end_x, end_y) = pose_flow["box"].astype("int") cv2.rectangle(dst_image, (start_x, start_y), (end_x, end_y), pose_color_bgr, 2, cv2.LINE_AA) put_text(dst_image, f"{pid:d}", (start_x, start_y), color=pose_color_bgr, bg_color=colors.get("black").to_bgr(), org_pos="tl") instance_keypoints = keypoints[idx] l_points = {} p_scores = {} # Draw keypoints for n in range(instance_keypoints.shape[0]): score = instance_keypoints[n, 2] if score <= 0.05: continue cor_x = int(np.clip(instance_keypoints[n, 0], 0, width)) cor_y = int(np.clip(instance_keypoints[n, 1], 0, height)) l_points[n] = (cor_x, cor_y) p_scores[n] = score cv2.circle(dst_image, (cor_x, cor_y), 2, pose_color_bgr, -1) # Draw limbs for i, (start_p, end_p) in enumerate(l_pairs): if start_p in l_points and end_p in l_points: start_xy = l_points[start_p] end_xy = l_points[end_p] start_score = p_scores[start_p] end_score = p_scores[end_p] cv2.line(dst_image, start_xy, end_xy, pose_color_bgr, int(2 * (start_score + end_score) + 1))
frame = cv2.imread(jpg) visualised_jpg_path = os.path.join(args.output, 'detection', image_basename) assert not os.path.isfile(visualised_jpg_path), visualised_jpg_path predictions = predictor(frame)["instances"].to("cpu") output_dict = { 'num_detections': len(predictions), 'detection_boxes': predictions.pred_boxes.tensor.numpy(), 'detection_classes': predictions.pred_classes.numpy(), 'detection_score': predictions.scores.numpy() } all_detection_outputs[frame_num] = output_dict vis_frame = video_visualiser.draw_instance_predictions( frame[:, :, ::-1], predictions) cv2.imwrite(visualised_jpg_path, vis_frame.get_image()[:, :, ::-1]) with open(predictions_save_path, 'wb') as handle: pickle.dump(all_detection_outputs, handle, protocol=pickle.HIGHEST_PROTOCOL) elif args.video_input: demo = VisualizationDemo(cfg) video = cv2.VideoCapture(args.video_input) width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames_per_second = video.get(cv2.CAP_PROP_FPS) num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
isColor=True, )""" while (cap.isOpened()): ret, frame = cap.read(0) frame = cv2.resize(frame, (224, 224)) print(fps) print(num_frames) try: outputs = predictor(frame) #v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0])) v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), instance_mode=ColorMode.IMAGE_BW) v = v.draw_instance_predictions(frame, outputs["instances"].to('cpu')) print(outputs["instances"].pred_classes) omt = str(outputs["instances"].pred_classes) outpclass = omt[8:9] print(outpclass) """while (cap.isOpened()): #outpclass is printing ang giving 0 if 0 comes then action this loop if outpclass == '0': #unlock(8) make ur own function to test time.sleep(10) #Lock will remains open for 10 seconds. make this run in loop #lock(8) #GPIO.cleanup(8)""" #out.write(v.get_image()) #cv2_imshow("Moda", v.get_image()) except: break
output = predictor(frame) try: # get first person detected print(output["instances"].pred_boxes) classes = output["instances"].pred_classes classes = classes.numpy() pos = np.where(classes == 0)[0][0] v = VideoVisualizer( metadata=MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), instance_mode=ColorMode.IMAGE, ) v = v.draw_instance_predictions( frame, output["instances"][int(pos)].to("cpu")) box = output["instances"][int(pos)].pred_boxes startX, startY, endX, endY = box.tensor.numpy().astype( "int").tolist()[0] detected_person = frame[startY:endY, startX:endX] cv2.imshow("images", v.get_image()[:, :, ::-1]) # cv2.imshow("images", v.get_image()[:, :, ::-1]) except: cv2.imshow("images", v.get_image()[:, :, ::-1]) # if output["instances"]: # # get first person detected # classes = output["instances"].pred_classes
# curr_inference_time = toc - tic # inference_time_cma = (n * inference_time_cma + curr_inference_time) / (n+1) # print('cma inference time: {:0.3} sec'.format(inference_time_cma)) # tic2 = time.time() drawned_frame = frame.copy() # make a copy of the original frame # draw on the frame with the res # v = Visualizer(drawned_frame[:, :, ::-1], # metadata=plastic_metadata, # scale=0.8, # instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels # ) v_out = viz.draw_instance_predictions(drawned_frame, res["instances"].to("cpu")) # v_out = viz.draw_instance_predictions(drawned_frame[:, :, ::-1], res["instances"].to("cpu")) drawned_frame = v_out.get_image() cv2.imshow(win_name, drawned_frame) # toc2 = time.time() vw.write(drawned_frame) # curr_drawing_time = toc2 - tic2 # drawing_time_cma = (n * drawing_time_cma + curr_drawing_time) / (n+1) # print('cma draw time: {:0.3} sec'.format(drawing_time_cma)) if cv2.waitKey(1) & 0xff == ord('q'): break