class Visualization(object): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = METADATA self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) def run_on_image(self, image): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_output = visualizer.draw_panoptic_seg_predictions( panoptic_seg.to(self.cpu_device), segments_info) else: if "sem_seg" in predictions: vis_output = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) vis_output = visualizer.draw_instance_predictions( predictions=instances) return predictions, vis_output def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
class VideoPredection(object): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) video = "test_videos/video.mp4" def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
class VisualizationDemo(object): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) def run_on_image(self, image, path): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_output = visualizer.draw_panoptic_seg_predictions( panoptic_seg.to(self.cpu_device), segments_info) else: if "sem_seg" in predictions: vis_output = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) vis_output = visualizer.draw_instance_predictions( predictions=instances) extract_img = vis_output.img #画关键点 keypoint_box = instances._fields["pred_keypoints"].numpy( ).tolist() img_name = path.split("/")[-1] if (len(keypoint_box) > 0): for idx, keypoint_list in enumerate(keypoint_box): for idxx, keypoint in enumerate(keypoint_list): pass _ = self.write(extract_img, 11, 15, 13, keypoint_list) text_img = self.write(_, 12, 16, 14, keypoint_list) rgb = text_img[..., ::-1] cv2.imwrite( "/home/dooncloud/GitHub/detectron2/output/self_" + img_name, rgb) # vis_output = visualizer.draw_instance_predictions(predictions=instances) return predictions, vis_output def calculate_angle(self, point_1, point_2, point_base): vector_a = [point_1[0] - point_base[0], point_1[1] - point_base[1]] vector_b = [point_2[0] - point_base[0], point_2[1] - point_base[1]] up = np.dot(vector_a, vector_b) a = np.linalg.norm(np.array(vector_a)) b = np.linalg.norm(np.array(vector_b)) down = a * b if down == 0: cos = 0.0 else: cos = up / down if (abs(cos) > 1): cos = 1 return math.degrees(math.acos(cos)) def calculate_distance(self, point_1, point_2): vector = [point_1[0] - point_2[0], point_1[1] - point_2[1]] distance = np.linalg.norm(np.array(vector)) return distance def where_point_write(self, n_list, keypoint_list): point_1 = keypoint_list[n_list[0]] point_2 = keypoint_list[n_list[1]] point_base = keypoint_list[n_list[2]] result = self.calculate_angle(point_1, point_2, point_base) x, y = point_base[0], point_base[1] return result, x, y def write(self, img, need_list, keypoint_list): if len(need_list) > 0: for i in need_list: result, x, y = self.where_point_write(i, keypoint_list) img = cv2.putText(img, "%.2f" % result, (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) else: img = img return img def where_distance_write(self, n_list, keypoint_list): point_1 = keypoint_list[n_list[0]] point_2 = keypoint_list[n_list[1]] result = self.calculate_distance(point_1, point_2) x, y = point_2[0], point_2[1] return result, x, y def write_distance(self, img, need_list, keypoint_list): if len(need_list) > 0: for i in need_list: result, x, y = self.where_distance_write(i, keypoint_list) img = cv2.putText(img, "%.2f" % result, (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) else: img = img return img def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video, dictionary): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions, dictionary): resulte = 0 frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) #判断框 max_inform_keypoint = self.search_max_box_information( predictions) if (max_inform_keypoint != None): #画框 bbox = max_inform_keypoint[0] frame = cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (0, 255, 0), 2) # 画关键点 keypoint_list = max_inform_keypoint[1] for i, keypoint in enumerate(keypoint_list): circle_coord = (int(keypoint[0]), int(keypoint[1])) frame = cv2.putText(frame, str(i), circle_coord, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2) #画角度 frame = self.write(frame, dictionary["angle"], keypoint_list) #画距离 frame = self.write_distance(frame, dictionary["distance"], keypoint_list) #判断仰卧起坐 resulte = self.poll_situp(keypoint_list, dictionary) #存结果 # save_json = self.save_resulte(keypoint_list,dictionary) vis_frame = frame[..., ::-1] else: vis_frame = frame[..., ::-1] # vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # Converts Matplotlib RGB format to OpenCV BGR format # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return { "vis_frame": vis_frame, "resulte": resulte, "max_inform_keypoint": max_inform_keypoint } frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame), dictionary) def poll_situp(self, keypoint_list, dictionary): ankle_angle_poll = self.angle_poll(dictionary["judge_ankle_angle"], keypoint_list, dictionary["require_ankle"]) up_butt_angle_poll = self.angle_poll(dictionary["judge_butt_angle"], keypoint_list, dictionary["require_butt_up"]) down_butt_angle_poll = self.angle_poll(dictionary["judge_butt_angle"], keypoint_list, dictionary["require_butt_down"]) distance_ratio_poll = self.distance_ratio_poll( dictionary["judge_distance_ratio"], keypoint_list, dictionary["require_distance_ratio"]) up_array = ankle_angle_poll + distance_ratio_poll + up_butt_angle_poll down_array = down_butt_angle_poll return [up_array, down_array] def save_resulte(self, keypoint_list, dictionary): ankle_num_list = self.calculate_save_angle( dictionary["judge_ankle_angle"], keypoint_list) log_f = open("digital", "a+") print(ankle_num_list, file=log_f) log_f.close() butt_num_list = self.calculate_save_angle( dictionary["judge_butt_angle"], keypoint_list) distance_num_list = self.distance_poll( dictionary["judge_distance"], keypoint_list, dictionary["require_distance_ratio"]) def calculate_save_angle(self, angle_list, keypoint_list): resulte = [] for i in (angle_list): point_1 = keypoint_list[i[0]] point_2 = keypoint_list[i[1]] point_base = keypoint_list[i[2]] angle_result = self.calculate_angle(point_1, point_2, point_base) resulte.append(angle_result) return resulte def angle_poll(self, angle_list, keypoint_list, requirement): poll = [] resulte = self.calculate_save_angle(angle_list, keypoint_list) for idx, per_resulte in enumerate(resulte): if "<" is requirement["need"]: if per_resulte < requirement["angle"][idx]: poll.append(1) else: poll.append(0) elif ">" is requirement["need"]: if per_resulte > requirement["angle"][idx]: poll.append(1) else: poll.append(0) elif "=" is requirement["need"]: if per_resulte == requirement["angle"][idx]: poll.append(1) else: poll.append(0) else: raise Exception("calculate_dictionary 请输入正确判断符号") return poll def calculate_save_distance_ratio(self, distance_list, keypoint_list): resulte = [] for i in (distance_list): point_1_1 = keypoint_list[i[0]] point_1_2 = keypoint_list[i[1]] point_2_1 = keypoint_list[i[2]] point_2_2 = keypoint_list[i[3]] up_result = self.calculate_distance(point_1_1, point_1_2) down_resulte = self.calculate_distance(point_2_1, point_2_2) ratio = up_result / down_resulte resulte.append(ratio) return resulte def distance_ratio_poll(self, distance_list, keypoint_list, requirement): poll = [] resulte = self.calculate_save_distance_ratio(distance_list, keypoint_list) print(resulte) for idx, per_resulte in enumerate(resulte): if "<" is requirement["need"]: if per_resulte < requirement["distance"][idx]: poll.append(1) else: poll.append(0) elif ">" is requirement["need"]: if per_resulte > requirement["distance"][idx]: poll.append(1) else: poll.append(0) elif "=" is requirement["need"]: if per_resulte == requirement["distance"][idx]: poll.append(1) else: poll.append(0) else: raise Exception("calculate_dictionary 请输入正确判断符号") print(poll) return poll def search_max_box_information(self, predictions): keypoint_box_area = predictions._fields["pred_boxes"].area().numpy( ).tolist() keypoint_box_coordinate = predictions._fields[ "pred_boxes"].tensor.numpy().tolist() keypoint_box = predictions._fields["pred_keypoints"].numpy().tolist() assert (len(keypoint_box_area) == len(keypoint_box_coordinate) and len(keypoint_box_coordinate) == len(keypoint_box)) is True, "search max box --error" if (len(keypoint_box_area) != 0): if len(keypoint_box_area) > 1: index = keypoint_box_area.index(max(keypoint_box_area)) return [keypoint_box_coordinate[index], keypoint_box[index]] else: return [keypoint_box_coordinate[0], keypoint_box[0]] else: pass
class VisualizationDemo(object): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) def run_on_image(self, image): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) # if "panoptic_seg" in predictions: # panoptic_seg, segments_info = predictions["panoptic_seg"] # vis_output = visualizer.draw_panoptic_seg_predictions( # panoptic_seg.to(self.cpu_device), segments_info # ) # else: # if "sem_seg" in predictions: # vis_output = visualizer.draw_sem_seg( # predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) # ) if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) vis_output = visualizer.draw_instance_predictions( predictions=instances) return predictions, vis_output def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: preductions, ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): # See https://detectron2.readthedocs.io/tutorials/models.html#model-output-format # note tensor ==> pytorch.tensor frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # if "panoptic_seg" in predictions: # panoptic_seg, segments_info = predictions["panoptic_seg"] # retval = panoptic_seg # TODO # vis_frame = video_visualizer.draw_panoptic_seg_predictions( # frame, panoptic_seg.to(self.cpu_device), segments_info # ) # elif "instances" in predictions: if "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) # TODO: grab all these # classes = predictions.to(self.cpu_device).pred_classes.numpy() # scores = predictions.scores # retval = predictions.to(self.cpu_device).pred_boxes.tensor.numpy() retval = predictions # elif "sem_seg" in predictions: # vis_frame = video_visualizer.draw_sem_seg( # frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) # ) # retval = predictions["sem_seg"].argmax(dim=0) # TODO # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return retval, vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
class VisualizationDemo(object): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) self.nextObjectID = 0 self.objects = OrderedDict() self.frame_count = 0 self.maximum_wait = OrderedDict() self.all_track_id = [] self.count = 0 self.time_count = 0 def create_track(self, id): self.objects[id] = 1 def disappear(self, id): if id in self.maximum_wait: self.maximum_wait[id] += 1 else: self.maximum_wait[id] = 1 def detrack(self, id, index): del self.maximum_wait[id] del self.objects[id] del self.all_track_id[index] def update(self, id): self.objects[id] += 1 def run_on_image(self, image): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # print('=====================>',predictions['instances'].pred_classes) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_output = visualizer.draw_panoptic_seg_predictions( panoptic_seg.to(self.cpu_device), segments_info) else: if "sem_seg" in predictions: vis_output = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) vis_output = visualizer.draw_instance_predictions( predictions=instances) return predictions, vis_output def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: frame = cv2.resize(frame, (960, 540), interpolation=cv2.INTER_CUBIC) yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) k = 0 try: vis_frame, colors = video_visualizer.draw_instance_predictions( frame, predictions) k = 1 except: vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) if k == 1: boxes = predictions.pred_boxes.tensor.numpy( ) if predictions.has("pred_boxes") else None classes = predictions.pred_classes.numpy( ) if predictions.has("pred_classes") else None person_list = [] person_track = [] for box, class_label, color in zip(boxes, classes, colors): if int(class_label) == 0: pixel_width = box[2] - box[0] # print(box,'=========================>') # print(pixel_width,'============================>') box = np.asarray([[box[0], box[1]], [box[2], box[3]]]) # pixel_per_metric = 15.45 # original_width = pixel_width * pixel_per_metric # distance_z = (original_width*3)/pixel_width #D’ = (W x F) / P distance_z = pixel_width cX = np.average(box[:, 0]) cY = np.average(box[:, 1]) # cY = cY + distance_z person_list.append([cX, cY, distance_z]) person_track.append(color) # print('<=============================>',person_list,'<=============================>') #find the center of the box by top-left x and bottom-right x / 2 and same for y elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) # D = dist.cdist(person_list,person_list,'euclidean') # print(person_list,D) # def midpoint(ptA, ptB): # return ((ptA[0] + ptB[0]) * 0.5, (ptA[1] + ptB[1]) * 0.5) self.time_count += 1 vis_frame = frame if k == 1: person = sorted(zip(person_list, person_track)) hh, ww, c = (540, 960, 3) # hh,ww,c = vis_frame.shape # aspect_ratio = 960/540 # width_scale = (530/960) # height_scale = (600/540) # result_width = int(vis_frame.shape[1]*width_scale) # result_height= int(vis_frame.shape[0]*height_scale) # result = np.zeros((result_width,result_height, 3)) result = np.zeros((530, 600, 3)) # x_scale = (result_width/vis_frame.shape[1]) # y_scale = (result_height/vis_frame.shape[0]) x_scale = (530 / vis_frame.shape[1]) y_scale = (600 / vis_frame.shape[0]) ht, wd, cc = result.shape # print(ww,wd) xx = (ww - wd) // 2 yy = (hh - ht) // 2 # print(xx, yy,'.................') color = (245, 245, 245) layer1 = np.full((hh, ww, cc), color, dtype=np.uint8) green_list = [] yellow_list = [] red_list = [] for box_i, track_i in person: for box_j, track_j in person: objectid = str(track_i) + str(track_j) objectid = objectid.replace('[', '').replace( ']', '').replace('.', '').replace(' ', '') if self.time_count % 10: self.time_count = 0 for indexs, l in enumerate(self.all_track_id): if l != objectid: self.disappear(l) if self.maximum_wait[l] >= 10000: self.detrack(l, indexs) if box_i != box_j: xA, yA, zA = box_i xB, yB, zB = box_j z_check = abs(zA - zB) D = dist.euclidean((xA, yA), (xB, yB)) division_index_A = yA / y_division division_index_B = yB / y_division A_div = division[int(division_index_A)] B_div = division[int(division_index_B)] yA = abs(yA + A_div) yB = abs(yB + B_div) xA = abs(xA + A_div) xB = abs(xB + B_div) if abs(division_index_A - division_index_B) < 1.0: Main_threshold = min(A_div, B_div) else: Main_threshold = 0.4 # cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)), # (255,0,0), 2) # def midpoint(ptA, ptB): # return ((ptA[0] + ptB[0]) * 0.5, (ptA[1] + ptB[1]) * 0.5) # (mX, mY) = midpoint((xA, yA), (xB, yB)) # cv2.putText(vis_frame, "{:.1f}in".format(D), (int(mX), int(mY - 10)),cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255,0,0), 2) # # print('......... ...') if D < Main_threshold: if objectid in self.objects: self.update(id=objectid) else: self.all_track_id.append(objectid) self.create_track(id=objectid) if self.objects[objectid] <= 90: xA, yA, zA = box_i xB, yB, ZB = box_j # cv2.circle(vis_frame, (int(xA), int(yA)), 5, (255,0,0), -1) # cv2.circle(vis_frame, (int(xB), int(yB)), 5, (255,0,0), -1) # overlay = vis_frame.copy() cv2.circle(vis_frame, (int(xA), int(yA)), 3, (0, 255, 255), -1) cv2.circle(vis_frame, (int(xB), int(yB)), 3, (0, 255, 255), -1) cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)), (255, 255, 0), 2) if box_i not in red_list and box_i not in yellow_list: yellow_list.append(box_i) new_box_i_x = int( round((box_i[0]) * x_scale)) new_box_i_y = int( round((box_i[1]) * y_scale)) new_box_j_x = int( round((box_j[0]) * x_scale)) new_box_j_y = int( round((box_j[1]) * y_scale)) cv2.line(result, (int(new_box_i_x), int(new_box_i_y)), (int(new_box_j_x), int(new_box_j_y)), (255, 255, 0), 2) # cv2.addWeighted(overlay, 0.1, vis_frame, 1 - 0.,0, vis_frame) else: xA, yA, zA = box_i xB, yB, zB = box_j # overlay = vis_frame.copy() cv2.circle(vis_frame, (int(xA), int(yA)), 3, (0, 0, 255), -1) cv2.circle(vis_frame, (int(xB), int(yB)), 3, (0, 0, 255), -1) cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)), (255, 0, 0), 2) if box_i not in red_list: red_list.append(box_i) new_box_i_x = int( round((box_i[0]) * x_scale)) new_box_i_y = int( round((box_i[1]) * y_scale)) new_box_j_x = int( round((box_j[0]) * x_scale)) new_box_j_y = int( round((box_j[1]) * y_scale)) cv2.line(result, (int(new_box_i_x), int(new_box_i_y)), (int(new_box_j_x), int(new_box_j_y)), (0, 0, 255), 2) else: if box_i not in red_list and box_i not in yellow_list and box_i not in green_list: green_list.append(box_i) if box_j not in red_list and box_j not in yellow_list and box_j not in green_list: green_list.append(box_j) for box_check, track_check in person: if box_check in red_list: new_box_i_x = int(round((box_check[0]) * x_scale)) new_box_i_y = int(round((box_check[1]) * y_scale)) # track_i = track_i * 255.0 cv2.circle(result, (new_box_i_x, new_box_i_y), 5, (0, 0, 255), 5) elif box_check in yellow_list: new_box_i_x = int(round((box_check[0]) * x_scale)) new_box_i_y = int(round((box_check[1]) * y_scale)) # track_i = track_i * 255.0 cv2.circle(result, (new_box_i_x, new_box_i_y), 5, (0, 255, 255), 5) elif box_check in green_list: new_box_i_x = int(round((box_check[0]) * x_scale)) new_box_i_y = int(round((box_check[1]) * y_scale)) # track_i = track_i * 255.0 cv2.circle(result, (new_box_i_x, new_box_i_y), 5, (0, 128, 0), 5) cv2.putText(result, "{:.1f}".format(len(red_list)), (int(20), int(40)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 5) cv2.putText(result, "{:.1f}".format(len(yellow_list)), (int(20), int(70)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 5) cv2.putText(result, "{:.1f}".format(len(green_list)), (int(20), int(100)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 5) # for i in range(1,16): # xA = 1 # yA = y_division * i # xB = 700 # yB = yA # cv2.line(vis_frame, (int(xA), int(yA)), (int(xB), int(yB)),(255,0,0), 2) # print(vis_frame.shape,layer1.shape) # cv2.imwrite('imagetest.jpg',layer1) vis_frame = cv2.cvtColor(vis_frame, cv2.COLOR_RGB2BGR) layer1[yy:yy + ht, xx:xx + wd] = result # vis_frame = cv2.resize(vis_frame,(960,540),interpolation = cv2.INTER_CUBIC) vis_frame = np.concatenate((vis_frame, layer1), axis=1) else: vis_frame = cv2.resize(vis_frame, (960, 540), interpolation=cv2.INTER_CUBIC) hh, ww, c = vis_frame.shape result = np.zeros((530, 600, 3)) # x_scale = (result_width/vis_frame.shape[1]) # y_scale = (result_height/vis_frame.shape[0]) x_scale = (530 / vis_frame.shape[1]) y_scale = (600 / vis_frame.shape[0]) ht, wd, cc = result.shape # print(ww,wd) xx = (ww - wd) // 2 yy = (hh - ht) // 2 # print(xx, yy,'.................') color = (245, 245, 245) layer1 = np.full((hh, ww, cc), color, dtype=np.uint8) layer1[yy:yy + ht, xx:xx + wd] = result vis_frame = cv2.resize(vis_frame, (960, 540), interpolation=cv2.INTER_CUBIC) # print(layer1.shape,vis_frame.shape) vis_frame = np.concatenate((vis_frame, layer1), axis=1) # cv2.addWeighted(overlay, 0.1, vis_frame, 1 - 0.1,0, vis_frame) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
class VisualizationDemo(object): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" ) self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) def run_on_image(self, image): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "inst" in predictions: visualizer.vis_inst(predictions["inst"]) if "bases" in predictions: self.vis_bases(predictions["bases"]) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_output = visualizer.draw_panoptic_seg_predictions( panoptic_seg.to(self.cpu_device), segments_info ) else: if "sem_seg" in predictions: vis_output = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) vis_output = visualizer.draw_instance_predictions(predictions=instances) return predictions, vis_output def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def vis_bases(self, bases): basis_colors = [[2, 200, 255], [107, 220, 255], [30, 200, 255], [60, 220, 255]] bases = bases[0].squeeze() bases = (bases / 8).tanh().cpu().numpy() num_bases = len(bases) fig, axes = plt.subplots(nrows=num_bases // 2, ncols=2) for i, basis in enumerate(bases): basis = (basis + 1) / 2 basis = basis / basis.max() basis_viz = np.zeros((basis.shape[0], basis.shape[1], 3), dtype=np.uint8) basis_viz[:, :, 0] = basis_colors[i][0] basis_viz[:, :, 1] = basis_colors[i][1] basis_viz[:, :, 2] = np.uint8(basis * 255) basis_viz = cv2.cvtColor(basis_viz, cv2.COLOR_HSV2RGB) axes[i // 2][i % 2].imshow(basis_viz) plt.show() def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info ) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) ) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
class VisualizationDemo(object): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) def run_on_image(self, image, save_name): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] mask = predictions['instances'].raw_masks.squeeze(1).data.cpu().numpy( ) if predictions['instances'].has("raw_masks") else None visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_output = visualizer.draw_panoptic_seg_predictions( panoptic_seg.to(self.cpu_device), segments_info) else: if "sem_seg" in predictions: vis_output = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) pred_classes = torch.ones(instances.pred_classes.shape) # uncomment to open nms between different classes ''' res = batched_nms(instances.pred_boxes.tensor, instances.scores, pred_classes, 0.5) print('res:', res) print('res:', res.size()[0]) #instances.num_instances = res.size()[0] instances.pred_boxes.tensor = instances.pred_boxes.tensor[res] instances.pred_classes = instances.pred_classes[res] instances.scores = instances.scores[res] instances.pred_keypoints = instances.pred_keypoints[res] instances.predict_trans = instances.predict_trans[res] instances.predict_rotation = instances.predict_rotation[res] instances.predict_vertices = instances.predict_vertices[res] print('pred trans shape:', instances.predict_trans.shape) ''' vis_output = visualizer.draw_instance_predictions( predictions=instances) output_trans_dir = './inference_val_translation/' output_rotation_dir = './inference_val_rotation/' output_mesh_dir = './inference_val_mesh/' output_cls_dir = './inference_val_cls/' output_score_dir = './inference_val_score/' save_name = save_name.split('/')[1] template_path = './merge_mean_car_shape/' faces = sr.Mesh.from_obj(template_path + 'merge_mean_car_model_0.obj').faces for directory in [ output_trans_dir, output_rotation_dir, output_mesh_dir, output_cls_dir, output_score_dir ]: if not os.path.exists(directory): os.makedirs(directory) for index in range(instances.predict_trans.shape[0]): with open( os.path.join( output_trans_dir, save_name + '_' + str(index) + '.json'), 'w') as f: data = {} data['translation'] = list( instances.predict_trans[index].cpu().detach( ).numpy().astype(float)) json.dump(data, f) for index in range(instances.predict_rotation.shape[0]): with open( os.path.join( output_rotation_dir, save_name + '_' + str(index) + '.json'), 'w') as f: data = {} data['rotation'] = list( instances.predict_rotation[index].cpu().detach( ).numpy().astype(float)) json.dump(data, f) for index in range(instances.pred_classes.shape[0]): with open( os.path.join( output_cls_dir, save_name + '_' + str(index) + '.json'), 'w') as f: data = {} data['car_id'] = int(instances.pred_classes[index].cpu( ).detach().numpy().astype(float)) json.dump(data, f) for index in range(instances.scores.shape[0]): with open( os.path.join( output_score_dir, save_name + '_' + str(index) + '.json'), 'w') as f: data = {} data['score'] = float(instances.scores[index].cpu(). detach().numpy().astype(float)) json.dump(data, f) for index in range(instances.predict_vertices.shape[0]): vertices = instances.predict_vertices[index].unsqueeze(0) sr.Mesh(vertices, faces).save_obj(os.path.join( output_mesh_dir, save_name + '_' + str(index) + '.obj'), save_texture=False) return predictions, vis_output def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
class PredictionDemo(object): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(cnt, frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] return cnt, panoptic_seg, segments_info elif "instances" in predictions: print("instances") predictions = predictions["instances"].to(self.cpu_device) return cnt, predictions elif "sem_seg" in predictions: print("sem_seg") vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) return cnt, vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): # print(cnt,1) frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for cnt, frame in enumerate(frame_gen): # print("non-parallel prediction",cnt) yield process_predictions(cnt, frame, self.predictor(frame))
class VisualizationDemo(object): def __init__(self, cfg, parallel, instance_mode=ColorMode.IMAGE): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if self.parallel == 1: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) self.video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) instances = self.video_visualizer.draw_instance_bbox( predictions) return instances frame_gen = self._frame_from_video(video) if self.parallel == 1: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
class VisualizationDemo(object): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) def run_on_image(self, image): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_output = visualizer.draw_panoptic_seg_predictions( panoptic_seg.to(self.cpu_device), segments_info) else: if "sem_seg" in predictions: vis_output = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) vis_output = visualizer.draw_instance_predictions( predictions=instances) return predictions, vis_output def run_on_image_detection(self, image): vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata) highest = self.highest_only(predictions) vis_output = visualizer.draw_instance_predictions(predictions=highest) return highest, vis_output def highest_only(self, predict): instance = predict["instances"].to(self.cpu_device) image_size = instance.image_size get_scores = instance.get("scores") pred_classes_index = [] if len(get_scores.tolist()) != 0: _, highest_index = torch.max(get_scores, 0) pred_classes_index.append(highest_index) pred_classes = self.tensor_transform(instance.get("pred_classes"), pred_classes_index) scores = self.tensor_transform(instance.get("scores"), pred_classes_index) pred_boxes = Boxes( self.tensor_transform( instance.get("pred_boxes").tensor, pred_classes_index)) return Instances(image_size=image_size, pred_boxes=pred_boxes, scores=scores, pred_classes=pred_classes) def run_on_image_flaw_only(self, image): vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) vis_output = visualizer.draw_instance_predictions( predictions=self.flaw_only(predictions)) return predictions, vis_output def flaw_only(self, predict): instance = predict["instances"].to(self.cpu_device) image_size = instance.image_size get_pred_classes = instance.get("pred_classes").numpy() pred_classes_index = [] pred_classes = [] for c in range(len(get_pred_classes)): if get_pred_classes[c] != 0 and get_pred_classes[c] != 1: pred_classes_index.append(c) pred_classes.append(get_pred_classes[c]) pred_classes = torch.from_numpy(np.asarray(pred_classes)) scores = self.tensor_transform(instance.get("scores"), pred_classes_index) pred_masks = self.tensor_transform(instance.get("pred_masks"), pred_classes_index) pred_boxes = Boxes( self.tensor_transform( instance.get("pred_boxes").tensor, pred_classes_index)) return Instances(image_size=image_size, pred_boxes=pred_boxes, scores=scores, pred_classes=pred_classes, pred_masks=pred_masks) def tensor_transform(self, t, indexes): tensor2array = t.numpy() new_array = [] for index in indexes: new_array.append(tensor2array[index]) new_array = torch.from_numpy(np.asarray(new_array)) return new_array def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
class VisualizationDemo(object): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ # self.metadata = MetadataCatalog.get( # cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" # ) # a = get_object_dicts("box/train") d = "train" # DatasetCatalog.register("box_" + d, lambda d=d: get_object_dicts("box/" + d)) DatasetCatalog.register("box", self.fake_func) MetadataCatalog.get("box_" + d).thing_classes = ['box'] self.metadata = MetadataCatalog.get("box") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) def fake_func(self): return {} def run_on_image(self, image): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_output = visualizer.draw_panoptic_seg_predictions( panoptic_seg.to(self.cpu_device), segments_info) else: if "sem_seg" in predictions: vis_output = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) vis_output = visualizer.draw_instance_predictions( predictions=instances) return predictions, vis_output def _frame_from_video(self, zed, image, runtime_parameters): while True: if zed.grab(runtime_parameters) == sl.ERROR_CODE.SUCCESS: zed.retrieve_image(image, sl.VIEW.LEFT) frame_gen = image.get_data() frame_gen = cv2.cvtColor(frame_gen, cv2.COLOR_RGB2BGR) yield frame_gen else: break def _depth_from_video(self, zed, depth, runtime_parameters): while True: if zed.grab(runtime_parameters) == sl.ERROR_CODE.SUCCESS: zed.retrieve_measure(depth, sl.MEASURE.DEPTH) depth_gen = depth.get_data() yield depth_gen else: break def _frame_depth_from_video(self, pipeline, pc): while True: frames = pipeline.wait_for_frames() depth_frame = frames.get_depth_frame() color_frame = frames.get_color_frame() if not depth_frame or not color_frame: continue # Convert images to numpy arrays depth_map = np.asanyarray(depth_frame.get_data()) color_image = np.asanyarray(color_frame.get_data()) points = pc.calculate(depth_frame) v = points.get_vertices() point_cloud = np.asanyarray(v).view(np.float32).reshape( 480, 640, 3) depth_colormap = cv2.applyColorMap( cv2.convertScaleAbs(depth_map, alpha=0.03), cv2.COLORMAP_JET) depth_colormap_dim = depth_colormap.shape color_colormap_dim = color_image.shape # If depth and color resolutions are different, resize color image to match depth image for display if depth_colormap_dim != color_colormap_dim: color_image = cv2.resize(color_image, dsize=(depth_colormap_dim[1], depth_colormap_dim[0]), interpolation=cv2.INTER_AREA) yield [color_image, depth_map, point_cloud] def run_on_video(self, pipeline, pc): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, point_cloud, depth, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # cv2.imwrite('image_raw.jpg', frame) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) if predictions.has("pred_masks"): masks = predictions.pred_masks frame_visualizer = Visualizer(frame, self.metadata) mask_layer = frame_visualizer.get_mask_layer(masks=masks) # vis_frame = video_visualizer.draw_instance_predictions(frame, predictions) # depth_layer = [] # start_time = time.time() # for i in range(len(mask_layer)): # mask = mask_layer[i].mask # for y in range(len(mask)): # concate_depth = mask[y]*depth[y] # # concate_depth = np.setdiff1d(concate_depth,np.array([float('nan')])) # # concate_depth = np.nan_to_num(concate_depth) # depth_layer.append(concate_depth) # # f = open('dummy_data/depth_map_{}.npy'.format(datetime.now().second), 'wb') # f = open('dummy_data/depth_map.npy', 'wb') # np.save(f, depth_layer) # np.save(f, point_cloud) # np.save(f, mask) # f.close() # end_time = time.time() # print('elapse time = ', end_time - start_time) # print('depth_layer ready ') elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # import numpy as np # import matplotlib.pyplot as plt # ax = plt.axes(projection='3d') # ax.scatter3D(np.array(point_cloud_layer)[:,0], np.array(point_cloud_layer)[:,1], np.array(point_cloud_layer)[:,2], cmap='Greens', s=0.5) # plt.show() # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = vis_frame.get_image() # vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) cv2.imwrite('dummy_data/image_seg.jpg', vis_frame) return vis_frame # frame_gen = self._frame_from_video(zed, image, runtime_parameters) # depth_gen = self._depth_from_video(zed, depth, runtime_parameters) data_gen = self._frame_depth_from_video(pipeline, pc) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame, depth, point_cloud in data_gen: # if cam_data.dtype == 'uint8': # frame = cam_data # else: # depth = cam_data yield process_predictions(frame, point_cloud, depth, self.predictor(frame))
class Predictor(DefaultPredictor): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ DatasetCatalog.register("pfallcnt_pred", lambda d: []) MetadataCatalog.get("pfallcnt_pred").set(thing_classes=["0", "1"], thing_colors=[(0, 255, 0), (255, 0, 0)]) self.metadata = MetadataCatalog.get("pfallcnt_pred") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) def run_on_image(self, image): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "instances" in predictions: predictions['instances'] = predictions['instances'].to('cpu') indices = predictions['instances'].pred_classes == 1 predictions['instances'] = predictions['instances'][indices] # if(len(predictions['instances']) == 0): # vis_output = image # else: vis_output = visualizer.draw_instance_predictions( predictions=predictions['instances']) return predictions, vis_output def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "instances" in predictions: predictions['instances'] = predictions['instances'].to('cpu') indices = predictions['instances'].pred_classes == 1 predictions['instances'] = predictions['instances'][indices] if (len(predictions['instances']) == 0): vis_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) else: vis_frame = video_visualizer.draw_instance_predictions( frame, predictions['instances']) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
class VisualizationDemo(object): def __init__(self, cfg, args, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.draw_proposals = args.draw_proposals self.thresh = args.confidence_threshold self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) self._init_visualization_metadata(cfg, args) def _init_visualization_metadata(self, cfg, args): """ Initilize visualizer. Args: cfg (CfgNode) """ self.metadata = create_visualization_metadata(cfg) def run_on_image(self, image): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = InteractionVisualizer(image, self.metadata, instance_mode=self.instance_mode) if self.draw_proposals: instances = predictions["proposals"].to(self.cpu_device) vis_output = visualizer.draw_proposals(proposals=instances) elif "hoi_instances" in predictions: instances = predictions["hoi_instances"].to(self.cpu_device) instances = self._convert_hoi_instances(instances) vis_output = visualizer.draw_interaction_predictions(predictions=instances) elif "box_instances" in predictions: instances = predictions["box_instances"].to(self.cpu_device) vis_output = visualizer.draw_instance_predictions(predictions=instances) return predictions, vis_output def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if self.draw_proposals: instances = predictions["proposals"].to(self.cpu_device) vis_frame = video_visualizer.draw_proposals(frame, instances, self.thresh) elif "hoi_instances" in predictions: instances = predictions["hoi_instances"].to(self.cpu_device) instances = self._convert_hoi_instances(instances) vis_frame = video_visualizer.draw_interaction_predictions(frame, instances) elif "box_instances" in predictions: instances = predictions["box_instances"].to(self.cpu_device) instances = self._convert_hoi_instances(instances) vis_frame = video_visualizer.draw_instance_predictions(frame, instances) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame)) def _convert_hoi_instances(self, instances): """ Convert an "Instances" object to a HOI "Instances" by merging the predicted object class and action class to an interaction class. For example, object ("bench") + action ("sit on") -> interaction ("sit on bench") """ num_instance = len(instances) if num_instance == 0: return instances # Meta data interaction_to_contiguous_id = self.metadata.get("interaction_to_contiguous_id", None) if interaction_to_contiguous_id: action_classes = self.metadata.get("action_classes", None) thing_classes = self.metadata.get("thing_classes", None) known_classes = self.metadata.get("known_classes", None) novel_classes = np.setdiff1d(thing_classes, known_classes).tolist() pred_object_classes = instances.object_classes.tolist() pred_action_classes = instances.action_classes.tolist() interaction_classes = [] keep = [] for ix in range(num_instance): object_id = pred_object_classes[ix] action_id = pred_action_classes[ix] # append detection results pred_action_name = action_classes[action_id] pred_object_name = thing_classes[object_id] pred_interaction_name = pred_action_name + " " + pred_object_name if pred_interaction_name in interaction_to_contiguous_id: #interaction_id = interaction_to_contiguous_id[pred_interaction_name] interaction_classes.append(pred_interaction_name) keep.append(ix) elif pred_object_name in novel_classes: # TODO: mine valid interaction with novel objects using external source. # Interactions with novel object categories interaction_classes.append(pred_interaction_name) keep.append(ix) instances = instances[keep] instances.pred_classes = np.asarray(interaction_classes) return instances
class VisualizationDemo(object): def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) def bbox_crop(self, frame, predictions, resx, resy, target_size, target_instance, padding): predictions2 = predictions["instances"].to(torch.device("cpu")) boxes = predictions2.pred_boxes scores = predictions2.scores classes = predictions2.pred_classes things = self.metadata.get("thing_classes", None) people_list = list( idx for idx, value in enumerate(classes) if things[value] == target_instance and scores[idx] >= 0.9) # def calc_spread(val, offset, ceiling): # if val + offset > ceiling: if len(people_list) > 0: max_score, max_index = scores.max(0) max_bbox = boxes[int(max_index)] bbox = max_bbox.tensor.tolist()[0] # size_x, size_y = (bbox[2] - bbox[0]), (bbox[3] - bbox[1]) minx, miny, maxx, maxy = max_bbox.tensor.tolist()[0] bbox_width, bbox_height = (maxx - minx), (maxy - miny) # Offset the image to make it square and add in padding if bbox_width > bbox_height: offset = (bbox_width - bbox_height) maxy += offset / 2 miny -= offset / 2 else: offset = (bbox_height - bbox_width) maxx += offset / 2 minx -= offset / 2 maxy += padding maxx += padding minx -= padding miny -= padding # Try to adjust image to make it valid if minx < 0: shift = abs(minx) minx += shift maxx += shift if maxx > resx: shift = maxx - resx minx -= shift maxx -= shift if miny < 0: shift = abs(miny) miny += shift maxy += shift if maxy > resy: shift = maxy - resy miny -= shift maxy -= shift if (minx < 0 or maxx > resx or miny < 0 or maxy > resy): return None else: bbox = (minx, miny, maxx, maxy) cropped_img = Image.fromarray( frame[:, :, ::-1]).crop(bbox).resize( (target_size, target_size), Image.ANTIALIAS) return cropped_img else: return None def run_on_image(self, image): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_output = visualizer.draw_panoptic_seg_predictions( panoptic_seg.to(self.cpu_device), segments_info) else: if "sem_seg" in predictions: vis_output = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) vis_output = visualizer.draw_instance_predictions( predictions=instances) return predictions, vis_output def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video, width, height, target_size, padding): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield self.bbox_crop(frame, predictions, width, height, target_size, "person", padding) # yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield self.bbox_crop(frame, predictions, width, height, target_size, "person", padding) # yield process_predictions(frame, predictions) else: for frame in frame_gen: yield self.bbox_crop(frame, self.predictor(frame), width, height, target_size, "person", padding)
class VisualizationDemo(object): def __init__(self, cfg, arg_metadata=None, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): arg_metadata (Metadata): Metadata in Metadata format (not json format) instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ print("According to the config we have " + str(cfg.MODEL.ROI_HEADS.NUM_CLASSES) + " classes.") #i need to add this metadata stuff according to https://github.com/facebookresearch/detectron2/issues/326 and https://github.com/facebookresearch/detectron2/issues/101 if (arg_metadata is None): #default value for COCO self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") print("I use the the default metadata which is:") print(MetadataCatalog.get(cfg.DATASETS.TEST[0])) #cfg.DATASETS.TRAIN is ('coco_2017_train',) #cfg.DATASETS.TEST[0] is coco_2017_val #MetadataCatalog.get(cfg.DATASETS.TEST[0]) is Metadata(evaluator_type='coco', image_root='datasets/coco/val2017', json_file='datasets/coco/annotations/instances_val2017.json', name='coco_2017_val', thing_classes=['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'], thing_colors=[[220, 20, 60], [119, 11, 32], [0, 0, 142], [0, 0, 230], [106, 0, 228], [0, 60, 100], [0, 80, 100], [0, 0, 70], [0, 0, 192], [250, 170, 30], [100, 170, 30], [220, 220, 0], [175, 116, 175], [250, 0, 30], [165, 42, 42], [255, 77, 255], [0, 226, 252], [182, 182, 255], [0, 82, 0], [120, 166, 157], [110, 76, 0], [174, 57, 255], [199, 100, 0], [72, 0, 118], [255, 179, 240], [0, 125, 92], [209, 0, 151], [188, 208, 182], [0, 220, 176], [255, 99, 164], [92, 0, 73], [133, 129, 255], [78, 180, 255], [0, 228, 0], [174, 255, 243], [45, 89, 255], [134, 134, 103], [145, 148, 174], [255, 208, 186], [197, 226, 255], [171, 134, 1], [109, 63, 54], [207, 138, 255], [151, 0, 95], [9, 80, 61], [84, 105, 51], [74, 65, 105], [166, 196, 102], [208, 195, 210], [255, 109, 65], [0, 143, 149], [179, 0, 194], [209, 99, 106], [5, 121, 0], [227, 255, 205], [147, 186, 208], [153, 69, 1], [3, 95, 161], [163, 255, 0], [119, 0, 170], [0, 182, 199], [0, 165, 120], [183, 130, 88], [95, 32, 0], [130, 114, 135], [110, 129, 133], [166, 74, 118], [219, 142, 185], [79, 210, 114], [178, 90, 62], [65, 70, 15], [127, 167, 115], [59, 105, 106], [142, 108, 45], [196, 172, 0], [95, 54, 80], [128, 76, 255], [201, 57, 1], [246, 0, 122], [191, 162, 208]], thing_dataset_id_to_contiguous_id={1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 13: 11, 14: 12, 15: 13, 16: 14, 17: 15, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21, 24: 22, 25: 23, 27: 24, 28: 25, 31: 26, 32: 27, 33: 28, 34: 29, 35: 30, 36: 31, 37: 32, 38: 33, 39: 34, 40: 35, 41: 36, 42: 37, 43: 38, 44: 39, 46: 40, 47: 41, 48: 42, 49: 43, 50: 44, 51: 45, 52: 46, 53: 47, 54: 48, 55: 49, 56: 50, 57: 51, 58: 52, 59: 53, 60: 54, 61: 55, 62: 56, 63: 57, 64: 58, 65: 59, 67: 60, 70: 61, 72: 62, 73: 63, 74: 64, 75: 65, 76: 66, 77: 67, 78: 68, 79: 69, 80: 70, 81: 71, 82: 72, 84: 73, 85: 74, 86: 75, 87: 76, 88: 77, 89: 78, 90: 79}) else: #custom metadata: this is my adaption so we can use our own classes for trained model on demo.py self.metadata = arg_metadata #this is a dict that already includes name, thing_classes etc. print("I use the given metadata which is:") print(self.metadata) #self.metadata is Metadata(name='Custom_Audi_A2D2_Dataset_Training', thing_classes=['Animal', 'Bicycle', 'Bus', 'Car', 'Cyclist', 'EmergencyVehicle', 'MotorBiker', 'Motorcycle', 'Pedestrian', 'Truck', 'UtilityVehicle', 'VanSUV', 'Misc']) self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: self.predictor = DefaultPredictor(cfg) def run_on_image(self, image): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_output = visualizer.draw_panoptic_seg_predictions( panoptic_seg.to(self.cpu_device), segments_info) else: if "sem_seg" in predictions: vis_output = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) vis_output = visualizer.draw_instance_predictions( predictions=instances) return predictions, vis_output def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))
class VisualizationDemo(object): def __init__(self, cfg, debug, instance_mode=ColorMode.IMAGE, parallel=False): """ Args: cfg (CfgNode): instance_mode (ColorMode): parallel (bool): whether to run the model in different processes from visualization. Useful since the visualization logic can be slow. """ self.metadata = MetadataCatalog.get( cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused") self.cpu_device = torch.device("cpu") self.instance_mode = instance_mode self.parallel = parallel if parallel: if debug: print('use parallel in fucntion predictor.py') num_gpu = torch.cuda.device_count() if debug: print('num_gpu : ', num_gpu) self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) else: if debug: print('no use parallel in function predictor.py') self.predictor = DefaultPredictor(cfg) def run_on_image(self, image, debug): """ Args: image (np.ndarray): an image of shape (H, W, C) (in BGR order). This is the format used by OpenCV. Returns: predictions (dict): the output of the model. vis_output (VisImage): the visualized image output. """ vis_output = None obj = None predictions = self.predictor(image) # Convert image from OpenCV BGR format to Matplotlib RGB format. image = image[:, :, ::-1] visualizer = Visualizer(image, self.metadata, instance_mode=self.instance_mode) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_output = visualizer.draw_panoptic_seg_predictions( panoptic_seg.to(self.cpu_device), segments_info) if debug: print('in panoptic_seg') else: if "sem_seg" in predictions: vis_output = visualizer.draw_sem_seg( predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) if debug: print("in sem_seg") if "instances" in predictions: instances = predictions["instances"].to(self.cpu_device) if debug: vis_output = visualizer.draw_instance_predictions( predictions=instances) print('in instances') #if output is json, debug is false if not debug: boxes = instances.pred_boxes.tensor.numpy( ) if instances.has("pred_boxes") else None scores = instances.scores if instances.has( 'scores') else None classes = instances.pred_classes if instances.has( "pred_classes") else None labels = _create_text_labels( classes, scores, visualizer.metadata.get("thing_classes", None)) keypoints = instances.pred_keypoints if instances.has( "pred_keypoints") else None if instances.has("pred_masks"): masks = np.asarray(instances.pred_masks) masks = [ GenericMask(x, visualizer.output.height, visualizer.output.width) for x in masks ] else: masks = None obj = {} for i, _ in enumerate(labels): tmp = {} split = labels[i].split() tmp['class'] = split[0] tmp['score'] = scores[i].item() tmp['box'] = {} tmp['box']['left-up'] = [ boxes[i][0].item(), boxes[i][1].item() ] tmp['box']['right-down'] = [ boxes[i][2].item(), boxes[i][3].item() ] tmp['polygons'] = {} if masks is not None: for idx, segment in enumerate(masks[i].polygons): tmp['polygons'][idx] = segment.reshape( -1, 2).tolist() obj[i] = tmp return predictions, vis_output, obj def _frame_from_video(self, video): while video.isOpened(): success, frame = video.read() if success: yield frame else: break def run_on_video(self, video): """ Visualizes predictions on frames of the input video. Args: video (cv2.VideoCapture): a :class:`VideoCapture` object, whose source can be either a webcam or a video file. Yields: ndarray: BGR visualizations of each video frame. """ video_visualizer = VideoVisualizer(self.metadata, self.instance_mode) def process_predictions(frame, predictions): frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) if "panoptic_seg" in predictions: panoptic_seg, segments_info = predictions["panoptic_seg"] vis_frame = video_visualizer.draw_panoptic_seg_predictions( frame, panoptic_seg.to(self.cpu_device), segments_info) elif "instances" in predictions: predictions = predictions["instances"].to(self.cpu_device) vis_frame = video_visualizer.draw_instance_predictions( frame, predictions) elif "sem_seg" in predictions: vis_frame = video_visualizer.draw_sem_seg( frame, predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)) # Converts Matplotlib RGB format to OpenCV BGR format vis_frame = cv2.cvtColor(vis_frame.get_image(), cv2.COLOR_RGB2BGR) return vis_frame frame_gen = self._frame_from_video(video) if self.parallel: buffer_size = self.predictor.default_buffer_size frame_data = deque() for cnt, frame in enumerate(frame_gen): frame_data.append(frame) self.predictor.put(frame) if cnt >= buffer_size: frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) while len(frame_data): frame = frame_data.popleft() predictions = self.predictor.get() yield process_predictions(frame, predictions) else: for frame in frame_gen: yield process_predictions(frame, self.predictor(frame))