def calc_metadata(args): #i need this function cause the metadata is NOT stored inside cfg if (args.meta_data is None): metadata=None else: #convert json to Metadata() format imported_Metadata_as_JSON = json.load(open(args.meta_data)) #parses argument as json imported_Metadata_as_Metadata = Metadata(name=imported_Metadata_as_JSON["name"]) #create Metadata type and init it with "name" attribute imported_Metadata_as_Metadata.set(thing_classes = imported_Metadata_as_JSON["thing_classes"]) #put classes information into Metadata metadata = imported_Metadata_as_Metadata return metadata
def set_up_faster_rcnn(self): self.cfg = get_cfg() self.cfg.merge_from_file( model_zoo.get_config_file( "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")) self.cfg.MODEL.WEIGHTS = "weights/model_final.pth" self.cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2 self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.65 self.predictor = DefaultPredictor(self.cfg) self.metadata = Metadata() self.metadata.set(thing_classes=['station', 'forklift'])
def postprocess(self, inference_output): """ Return predict result in batch """ print("Starting model inference output postprocessing...") results = [] for model_inference in inference_output: image = model_inference[0] outputs = model_inference[1] predictions = outputs["instances"].to("cpu") severstal_metadata = Metadata() severstal_metadata.set( thing_classes=["Type 1", "Type 2", "Type 3", "Type 4"]) visualizer_pred = Visualizer(image[:, :, ::-1], metadata=severstal_metadata, scale=0.5) image_pred = visualizer_pred.draw_instance_predictions(predictions) image_cv2 = cv2.cvtColor(image_pred.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB) image_string = base64.b64encode( cv2.imencode(".jpg", image_cv2)[1].tobytes()).decode("utf-8") image_b64 = "data:image/jpg;base64," + image_string if predictions.has("pred_classes"): classes = predictions.pred_classes.numpy().tolist() else: classes = None if predictions.has("scores"): scores = predictions.scores.numpy().tolist() else: scores = None if predictions.has("pred_boxes"): boxes = predictions.pred_boxes.tensor.numpy().tolist() else: boxes = None if predictions.has("pred_masks"): #/!\ For an unknown reason (lack of memory, timeout...?), this doesn't work with TorchServe: #/!\ (it works perfectly in a Jupyter notebook!) #masks = predictions.pred_masks.numpy().tolist() masks = None else: masks = None result = { "data": image_b64, "classes": classes, "scores": scores, "boxes": boxes, "masks": masks } results.append(json.dumps(result)) print("Model inference output postprocessing done") return results
def _merge_metadata(self): added_classes = self.mask_md.get('thing_classes') total_classes = self.po_md.get('thing_classes').copy() stuff_classes = self.po_md.get('stuff_classes') self.offset = len(total_classes) for c in added_classes: if c not in total_classes: total_classes.append(c) else: total_classes.append(f"custom_{c}") self.blend_md = Metadata(thing_classes=total_classes, stuff_classes=stuff_classes)
def register_custom_coco_dataset(cfg: DictConfig, process: str = 'train') \ -> Tuple[List[Dict], Metadata]: """ Registering the custom dataset in COCO format to detectron2. :param cfg: the configuration dictionary of dataset_model. :type cfg: omegaconf.dictconfig.DictConfig. :param process: value should be 'train', 'val', or 'test' :type process: str :return information about images and instances in COCO format, together with its metadata. :rtype dataset_dicts: List[Dict]. dataset_metadata: detectron2.data.catalog.Metadata. """ if process not in ['train', 'test', 'val']: raise Exception( f"process is {process}, but it must be either 'train', 'test', or 'val'" ) dataset_dicts: List[Dict] = [{}] dataset_metadata: Metadata = Metadata() train_dataset: str = cfg.name + "_train" train_images_dir: Path = PROJECT_PATH / cfg.train.train_dataset_dir / 'images' train_coco_instances_json: str = str( PROJECT_PATH / cfg.train.train_dataset_dir / 'coco_instances.json') try: log.info(f'Registering {train_dataset} as a COCO-format dataset') register_coco_instances(name=train_dataset, metadata={}, json_file=train_coco_instances_json, image_root=train_images_dir) except AssertionError: # if the dataset is already registered, do nothing pass if process == 'train': dataset_dicts = DatasetCatalog.get(train_dataset) dataset_metadata = MetadataCatalog.get(train_dataset) elif process == 'test': log.info(f'Getting metadata for testing on {cfg.name}') dataset_metadata = MetadataCatalog.get(train_dataset) elif process == 'val': val_dataset: str = cfg.name + "_val" val_images_dir: Path = PROJECT_PATH / cfg.validation.val_dataset_dir / 'images' val_coco_instances_json: str = str(PROJECT_PATH / cfg.validation.val_dataset_dir / 'coco_instances.json') log.info(f'Registering {val_dataset} as a COCO-format dataset') register_coco_instances(name=val_dataset, metadata={}, json_file=val_coco_instances_json, image_root=val_images_dir) dataset_dicts = DatasetCatalog.get(val_dataset) dataset_metadata = MetadataCatalog.get(val_dataset) return dataset_dicts, dataset_metadata
def make_inference(image, model_weights, threshold, n=5, save=False): """ Makes inference on image (single image) using model_config, model_weights and threshold. Returns image with n instance predictions drawn on. Params: ------- image (str) : file path to target image model_weights (str) : file path to model weights threshold (float) : confidence threshold for model prediction, default 0.5 n (int) : number of prediction instances to draw on, default 5 Note: some images may not have 5 instances to draw on depending on threshold, n=5 means the top 5 instances above the threshold will be drawn on. save (bool) : if True will save image with predicted instances to file, default False """ # Create predictor and model config cfg, predictor = create_predictor(model_weights, threshold) # Convert PIL image to array image = np.asarray(image) # Create metadata metadata = Metadata() metadata.set(thing_classes=subset) # Create visualizer instance visualizer = Visualizer(img_rgb=image, metadata=metadata, scale=0.3) outputs = predictor(image) # Get instance predictions from outputs instances = outputs["instances"] # Draw on predictions to image vis = visualizer.draw_instance_predictions(instances[:n].to("cpu")) return vis.get_image(), instances[:n]
def _maybe_substitute_metadata(self): cont_id_2_cat_id = get_contiguous_id_to_category_id_map(self._metadata) cat_id_2_cont_id = self._metadata.thing_dataset_id_to_contiguous_id if len(cont_id_2_cat_id) == len(cat_id_2_cont_id): return cat_id_2_cont_id_injective = {} for cat_id, cont_id in cat_id_2_cont_id.items(): if (cont_id in cont_id_2_cat_id) and (cont_id_2_cat_id[cont_id] == cat_id): cat_id_2_cont_id_injective[cat_id] = cont_id metadata_new = Metadata(name=self._metadata.name) for key, value in self._metadata.__dict__.items(): if key == "thing_dataset_id_to_contiguous_id": setattr(metadata_new, key, cat_id_2_cont_id_injective) else: setattr(metadata_new, key, value) self._metadata = metadata_new
def __init__(self, config_path, model_weights_path, model_device="cpu"): mp.set_start_method("spawn", force=True) self.logger = setup_logger() cfg = self._setup_cfg( config_path, [ "MODEL.WEIGHTS", model_weights_path, "MODEL.DEVICE", model_device, ], 0.5, ) metadata = Metadata( evaluator_type="coco", name="PubLayNet", thing_classes=["text", "title", "list", "table", "figure"], ) self.demo = VisualizationDemo(cfg, metadata, parallel=True)
def __init__(self, config_path, model_weights_path, model_device="cpu"): mp.set_start_method("spawn", force=True) self.logger = setup_logger("detectron.out") cfg = self._setup_cfg( config_path, [ "MODEL.WEIGHTS", model_weights_path, "MODEL.DEVICE", model_device, ], 0.5, ) self.metadata = Metadata( evaluator_type="coco", name="PubLayNet", thing_classes=["text", "title", "list", "table", "figure"], ) num_gpu = torch.cuda.device_count() self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
class detector: def __init__(self, rgb_image, depth_image, fx, fy, cx, cy): self.set_up_faster_rcnn() self.set_up_fpointnet() self.detection(rgb_image, depth_image, fx, fy, cx, cy) def set_up_faster_rcnn(self): self.cfg = get_cfg() self.cfg.merge_from_file( model_zoo.get_config_file( "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")) self.cfg.MODEL.WEIGHTS = "weights/model_final.pth" self.cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2 self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.65 self.cfg.MODEL.DEVICE = 'cpu' self.predictor = DefaultPredictor(self.cfg) self.metadata = Metadata() self.metadata.set(thing_classes=['station', 'forklift']) def set_up_fpointnet(self): self.FrustumPointNet = FrustumPointNetv1(n_classes=6, n_channel=6) self.pth = torch.load("weights/frustum_model.pth", map_location='cpu') self.FrustumPointNet.load_state_dict(self.pth['model_state_dict']) self.model = self.FrustumPointNet.eval() def detection(self, rgb_image, depth_image, fx, fy, cx, cy): print('start detection') rgb_image = rgb_image depth_image = np.nan_to_num(depth_image, nan=0) outputs = self.predictor(rgb_image) prob_list = outputs["instances"].scores class_list = outputs["instances"].pred_classes box2d_list = outputs["instances"].pred_boxes.tensor pitch = 0.09557043068606919 rotation = np.array([[1, 0, 0], [0, np.cos(pitch), -np.sin(pitch)], [0, np.sin(pitch), np.cos(pitch)]]) count = 0 pose = np.zeros([1, 4]) pose_list = [] for idx in range(len(class_list)): object_class = class_list[idx].numpy() prob = prob_list[idx].numpy() xmin, ymin, xmax, ymax = map(int, box2d_list[idx]) if (xmax - xmin) > 1.5 * (ymax - ymin): continue rgb = np.zeros_like(rgb_image) depth = np.zeros_like(depth_image) rgb[ymin:ymax, xmin:xmax] = rgb_image[ymin:ymax, xmin:xmax] depth[ymin:ymax, xmin:xmax] = depth_image[ymin:ymax, xmin:xmax] print("class: {} ,depth_mean: {}".format( object_class, np.mean(depth[ymin:ymax, xmin:xmax]))) pcs = depth2pc(rgb, depth, fx, fy, cx, cy, 1).point_cloud_generator() pcs[:, 0:3] = np.dot(pcs[:, 0:3].astype(np.float32), rotation) mask = pcs[:, 2] != 0 pcs = pcs[mask, :] box2d_center = np.array([(xmin + xmax) / 2.0, (ymin + ymax) / 2.0]) uvdepth = np.zeros((1, 3)) uvdepth[0, 0:2] = box2d_center uvdepth[0, 2] = np.mean(pcs[:, 2]) #20 # some random depth x = ((uvdepth[:, 0] - cx) * uvdepth[:, 2]) / fx y = ((uvdepth[:, 1] - cy) * uvdepth[:, 2]) / fy uvdepth[:, 0] = x uvdepth[:, 1] = y frustum_angle = -1 * np.arctan2(uvdepth[0, 2], uvdepth[ 0, 0]) # angle as to positive x-axis as in the Zoox paper # Pass objects that are too small if len(pcs) < 5: continue if object_class == 0: object_class = 'box' data = provider.FrustumDataset(npoints=2048, pcs=pcs, object_class=object_class, frustum_angle=frustum_angle, prob=prob) point_set, rot_angle, prob, one_hot_vec = data.data() point_set = torch.unsqueeze(torch.tensor(point_set), 0).transpose(2, 1).float() one_hot_vec = torch.unsqueeze(torch.tensor(one_hot_vec), 0).float() # print('start fpointnets') logits, mask, stage1_center, center_boxnet, object_pts, \ heading_scores, heading_residuals_normalized, heading_residuals, \ size_scores, size_residuals_normalized, size_residuals, center = \ self.model(point_set, one_hot_vec) corners_3d = get_box3d_corners(center, heading_residuals, size_residuals) logits = logits.detach().numpy() mask = mask.detach().numpy() center_boxnet = center_boxnet.detach().numpy() object_pts = object_pts.detach().squeeze().numpy().transpose( 1, 0) stage1_center = stage1_center.detach().numpy() center = center.detach().numpy() heading_scores = heading_scores.detach().numpy() # heading_residuals_normalized = heading_residuals_normalized.detach().numpy() heading_residuals = heading_residuals.detach().numpy() size_scores = size_scores.detach().numpy() size_residuals = size_residuals.detach().numpy() corners_3d = corners_3d.detach().numpy() output = np.argmax(logits, 2) heading_class = np.argmax(heading_scores) size_class = np.argmax(size_scores) corners_3d = corners_3d[0, heading_class, size_class] pred_angle = provider.class2angle( heading_class, heading_residuals[0, heading_class], NUM_HEADING_BIN) pred_size = provider.class2size(size_class, size_residuals[0, size_class]) cloud = pcs[:, 0:3].astype(np.float32) object_cloud = (object_pts - center_boxnet.repeat( object_pts.shape[0], 0)).astype(np.float32) station_size = (0.979, 0.969, 0.979) cube = generate_station_model_with_normal( np.array([[0, 0, 0]]), station_size, -pred_angle) station_cloud = cube.generate_points().astype(np.float32) cloud_icp = cicp(object_cloud, station_cloud, max_iterations=20) T, R, t = cloud_icp.cicp() cloud_t = np.tile(t, (station_cloud.shape[0], 1)) station_cloud_rect = station_cloud[:, :3] - cloud_t station_cloud_rect = station_cloud_rect + center.repeat( station_cloud_rect.shape[0], 0) object_cloud = object_cloud + center.repeat( object_cloud.shape[0], 0) station_cloud[:, :3] = station_cloud[:, :3] + center.repeat( station_cloud.shape[0], 0) center = center - t[np.newaxis, :] corners_3d_rect = box3d_corners(center, pred_angle, station_size) object_cloud = rotate_pc_along_y(object_cloud, -rot_angle) station_cloud_rect = rotate_pc_along_y(station_cloud_rect, -rot_angle) station_cloud[:, :3] = rotate_pc_along_y( station_cloud[:, :3], -rot_angle) center = rotate_pc_along_y(center, -rot_angle) corners_3d = rotate_pc_along_y(corners_3d, -rot_angle) corners_3d_rect = rotate_pc_along_y(corners_3d_rect, -rot_angle) center[0, 1] = 0.815 pose[0, :3] = center pose[0, 3] = pred_angle + rot_angle pose_list.append(pose.copy()) count += 1 station_rect_pub = point_cloud_publisher( '/points_station_rect%d' % (count), station_cloud_rect) bbox_pub_rect = bbox_publisher('/bbox_rect%d' % (count), corners_3d_rect, color="green") object_pub = point_cloud_publisher( '/points_object%d' % (count), object_cloud) station_rect_pub.point_cloud_publish() bbox_pub_rect.bbox_publish() object_pub.point_cloud_publish() pose_pub = pose_publisher('station_pose', pose_list) pose_pub.pose_publish() print('once detection')
class detector: def __init__(self, rgb_image, depth_image, fx, fy, cx, cy): self.set_up_faster_rcnn() self.set_up_fpointnet() self.detection(rgb_image, depth_image, fx, fy, cx, cy) def set_up_faster_rcnn(self): self.cfg = get_cfg() self.cfg.merge_from_file( model_zoo.get_config_file( "COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml")) self.cfg.MODEL.WEIGHTS = "weights/model_final.pth" self.cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2 self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.65 self.predictor = DefaultPredictor(self.cfg) self.metadata = Metadata() self.metadata.set(thing_classes=['station', 'forklift']) def set_up_fpointnet(self): self.FrustumPointNet = FrustumPointNetv1(n_classes=6, n_channel=6).cuda() self.pth = torch.load("weights/frustum_model.pth") self.FrustumPointNet.load_state_dict(self.pth['model_state_dict']) self.model = self.FrustumPointNet.eval() def detection(self, rgb_image, depth_image, fx, fy, cx, cy): print('start detection') rgb_image = rgb_image depth_image = np.nan_to_num(depth_image, nan=0) outputs = self.predictor(rgb_image) prob_list = outputs["instances"].scores class_list = outputs["instances"].pred_classes box2d_list = outputs["instances"].pred_boxes.tensor # # v = Visualizer(rgb_image[:, :, ::-1], MetadataCatalog.get(self.cfg.DATASETS.TRAIN[0]), scale=1.2) # v = Visualizer(rgb_image[:, :, ::-1], metadata=self.metadata, scale=1.2) # out = v.draw_instance_predictions(outputs["instances"].to("cpu")) # cv2.namedWindow('test',0) # cv2.imshow('test',out.get_image()[:, :, ::-1]) # cv2.waitKey(0) # print("depth mean {}".format(np.mean(depth_image))) pitch = 0.09557043068606919 rotation = np.array([[1, 0, 0], [0, np.cos(pitch), -np.sin(pitch)], [0, np.sin(pitch), np.cos(pitch)]]) count = 0 pose = np.zeros([1, 4]) pose_list = [] for idx in range(len(class_list)): object_class = class_list[idx].cpu().numpy() prob = prob_list[idx].cpu().numpy() xmin, ymin, xmax, ymax = map(int, box2d_list[idx]) if (xmax - xmin) > 1.5 * (ymax - ymin): continue rgb = np.zeros_like(rgb_image) depth = np.zeros_like(depth_image) rgb[ymin:ymax, xmin:xmax] = rgb_image[ymin:ymax, xmin:xmax] depth[ymin:ymax, xmin:xmax] = depth_image[ymin:ymax, xmin:xmax] print("class: {} ,depth_mean: {}".format( object_class, np.mean(depth[ymin:ymax, xmin:xmax]))) pcs = depth2pc(rgb, depth, fx, fy, cx, cy, 1).point_cloud_generator() pcs[:, 0:3] = np.dot(pcs[:, 0:3].astype(np.float32), rotation) mask = pcs[:, 2] != 0 pcs = pcs[mask, :] box2d_center = np.array([(xmin + xmax) / 2.0, (ymin + ymax) / 2.0]) uvdepth = np.zeros((1, 3)) uvdepth[0, 0:2] = box2d_center uvdepth[0, 2] = np.mean(pcs[:, 2]) #20 # some random depth x = ((uvdepth[:, 0] - cx) * uvdepth[:, 2]) / fx y = ((uvdepth[:, 1] - cy) * uvdepth[:, 2]) / fy uvdepth[:, 0] = x uvdepth[:, 1] = y frustum_angle = -1 * np.arctan2(uvdepth[0, 2], uvdepth[ 0, 0]) # angle as to positive x-axis as in the Zoox paper # Pass objects that are too small if len(pcs) < 5: continue if object_class == 0: object_class = 'box' data = provider.FrustumDataset(npoints=2048, pcs=pcs, object_class=object_class, frustum_angle=frustum_angle, prob=prob) point_set, rot_angle, prob, one_hot_vec = data.data() point_set = torch.unsqueeze(torch.tensor(point_set), 0).transpose(2, 1).float().cuda() one_hot_vec = torch.unsqueeze(torch.tensor(one_hot_vec), 0).float().cuda() # print('start fpointnets') logits, mask, stage1_center, center_boxnet, object_pts, \ heading_scores, heading_residuals_normalized, heading_residuals, \ size_scores, size_residuals_normalized, size_residuals, center = \ self.model(point_set, one_hot_vec) corners_3d = get_box3d_corners(center, heading_residuals, size_residuals) logits = logits.cpu().detach().numpy() mask = mask.cpu().detach().numpy() center_boxnet = center_boxnet.cpu().detach().numpy() object_pts = object_pts.cpu().detach().squeeze().numpy( ).transpose(1, 0) stage1_center = stage1_center.cpu().detach().numpy() center = center.cpu().detach().numpy() heading_scores = heading_scores.cpu().detach().numpy() # heading_residuals_normalized = heading_residuals_normalized.cpu().detach().numpy() heading_residuals = heading_residuals.cpu().detach().numpy() size_scores = size_scores.cpu().detach().numpy() size_residuals = size_residuals.cpu().detach().numpy() corners_3d = corners_3d.cpu().detach().numpy() output = np.argmax(logits, 2) heading_class = np.argmax(heading_scores) size_class = np.argmax(size_scores) corners_3d = corners_3d[0, heading_class, size_class] pred_angle = provider.class2angle( heading_class, heading_residuals[0, heading_class], NUM_HEADING_BIN) pred_size = provider.class2size(size_class, size_residuals[0, size_class]) cloud = pcs[:, 0:3].astype(np.float32) object_cloud = (object_pts - center_boxnet.repeat( object_pts.shape[0], 0)).astype(np.float32) station_size = (0.979, 0.969, 0.979) cube = generate_station_model_with_normal( np.array([[0, 0, 0]]), station_size, -pred_angle) # object_cloud = rotate_pc_along_y(object_cloud,pred_angle) # cube = generate_station_model_with_normal(np.array([[0,0,0]]),station_size,0) station_cloud = cube.generate_points().astype(np.float32) # # object_cloud_crop = object_cloud[object_cloud[:,1]>(center[0][1]-0.48)] # # station_cloud_crop = station_cloud[station_cloud[:,1]>(center[0][1]-0.48)] # object_cloud_crop = object_cloud[object_cloud[:,1]>(-0.48)] # station_cloud_crop = station_cloud[station_cloud[:,1]>(-0.48)] # # station_cloud = rotate_pc_along_y(station_cloud, rot_angle) # cloud_icp = cicp(object_cloud,station_cloud_crop,max_iterations=30) cloud_icp = cicp(object_cloud, station_cloud, max_iterations=20) # cloud_icp = icp(object_cloud,station_cloud,max_iterations=20) T, R, t = cloud_icp.cicp() # R_angle = np.arccos(R[0,0]) # print(R) # print(t) # if abs(t[0]) > abs(t[2]): # t[2] = 0 # if abs(t[0]) < abs(t[2]): # t[0] = 0 # print(t) cloud_t = np.tile(t, (station_cloud.shape[0], 1)) # station_cloud_rect = np.dot(station_cloud[:,:3]-cloud_t, np.transpose(np.linalg.pinv(R))) # station_cloud_rect = np.dot(station_cloud[:,:3]-cloud_t, np.transpose(np.linalg.pinv(R))) # station_cloud[:,:3] = rotate_pc_along_y(station_cloud[:,:3],-R_angle) station_cloud_rect = station_cloud[:, :3] - cloud_t station_cloud_rect = station_cloud_rect + center.repeat( station_cloud_rect.shape[0], 0) object_cloud = object_cloud + center.repeat( object_cloud.shape[0], 0) station_cloud[:, :3] = station_cloud[:, :3] + center.repeat( station_cloud.shape[0], 0) center = center - t[np.newaxis, :] corners_3d_rect = box3d_corners(center, pred_angle, station_size) object_cloud = rotate_pc_along_y(object_cloud, -rot_angle) station_cloud_rect = rotate_pc_along_y(station_cloud_rect, -rot_angle) station_cloud[:, :3] = rotate_pc_along_y( station_cloud[:, :3], -rot_angle) center = rotate_pc_along_y(center, -rot_angle) corners_3d = rotate_pc_along_y(corners_3d, -rot_angle) corners_3d_rect = rotate_pc_along_y(corners_3d_rect, -rot_angle) center[0, 1] = 0.815 pose[0, :3] = center pose[0, 3] = pred_angle + rot_angle pose_list.append(pose.copy()) # # station_cloud_rect = rotate_pc_along_y(station_cloud[:,:3]-cloud_t, R_angle) # # cloud = np.dot(cloud, np.transpose(R))+np.tile(t,(cloud.shape[0],1)) # object_pub = point_cloud_publisher('/points_object',object_cloud_crop) # station_pub = point_cloud_publisher('/points_station',station_cloud_crop[:,:3]) count += 1 # station_pub = point_cloud_publisher('/points_station%d'%(count),station_cloud[:,:3]) station_rect_pub = point_cloud_publisher( '/points_station_rect%d' % (count), station_cloud_rect) # bbox_pub = bbox_publisher('/bbox%d'%(count),corners_3d) bbox_pub_rect = bbox_publisher('/bbox_rect%d' % (count), corners_3d_rect, color="green") object_pub = point_cloud_publisher( '/points_object%d' % (count), object_cloud) # cloud_pub = point_cloud_publisher('/points_cloud%d'%(count),cloud) # cloud_pub1 = point_cloud_publisher('/points_cloud1',cloud1) station_rect_pub.point_cloud_publish() bbox_pub_rect.bbox_publish() object_pub.point_cloud_publish() # bbox_list.append(bbox_pub) # bbox_list.append(bbox_pub_rect) # point_cloud_list.append(station_pub) # point_cloud_list.append(station_rect_pub) # point_cloud_list.append(object_pub) # point_cloud_list.append(cloud_pub) # with open('results.txt','ab') as f: # np.savetxt(f, box_info, delimiter=" ") # rate = rospy.Rate(10) # while not rospy.is_shutdown(): # # point_cloud_list[3].point_cloud_publish() # # # object_pub.point_cloud_publish() # station_pub.point_cloud_publish() # station_rect_pub.point_cloud_publish() # bbox_pub.bbox_publish() # bbox_pub_rect.bbox_publish() # object_pub.point_cloud_publish() # cloud_pub.point_cloud_publish() # # # cloud_pub1.point_cloud_publish() # rate.sleep() pose_pub = pose_publisher('station_pose', pose_list) pose_pub.pose_publish() print('once detection')
def d2_vis(dset_meta, pan_mask, pan_ann, im, scale=0.7): from detectron2.data import MetadataCatalog from detectron2.utils.visualizer import ColorMode, Visualizer # print(self.dset_meta) # if len(self.dset_meta['cats']) > 20: if len(dset_meta['cats']) > 20: meta = MetadataCatalog.get("coco_2017_val_panoptic_separated") else: thing_ids = [_['id'] for _ in dset_meta['cats'].values() if _['isthing']] stuff_ids = [_['id'] for _ in dset_meta['cats'].values() if not _['isthing']] thing_colors = [dset_meta['cats'][_]['color'] for _ in thing_ids] stuff_colors = [dset_meta['cats'][_]['color'] for _ in stuff_ids] thing_classes = [dset_meta['cats'][_]['name'] for _ in thing_ids] stuff_classes = [dset_meta['cats'][_]['name'] for _ in stuff_ids] thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)} from detectron2.data.catalog import Metadata meta = Metadata().set( thing_ids=thing_ids, stuff_ids=stuff_ids, thing_colors=thing_colors, stuff_colors=stuff_colors, thing_classes=thing_classes, stuff_classes=stuff_classes, thing_dataset_id_to_contiguous_id=thing_dataset_id_to_contiguous_id, stuff_dataset_id_to_contiguous_id=stuff_dataset_id_to_contiguous_id ) for seg in pan_ann['segments_info']: if seg['isthing']: seg["category_id"] = meta.thing_dataset_id_to_contiguous_id[seg["category_id"]] else: seg["category_id"] = meta.stuff_dataset_id_to_contiguous_id[seg["category_id"]] def upsample_mask(mask, im_size): # return torch.nn.functional.upsample(torch.from_numpy(mask.astype(np.int32)).float().unsqueeze(0).unsqueeze(0), scale_factor=scale_factor).squeeze(0).squeeze(0).long() mask = torch.from_numpy(mask.astype(np.int64)).cuda() # import pdb;pdb.set_trace() inx = torch.unique(mask[mask > 0]) inx_len = len(inx) tmp = mask.new_zeros((inx_len, )+mask.shape, dtype=torch.bool) for i in range(inx_len): tmp[i, :, :] = mask == inx[i] tmp = torch.nn.functional.interpolate( tmp.float().unsqueeze(0), im_size, mode='bicubic' ).squeeze(0) tmp = torch.nn.functional.avg_pool2d( tmp.float().unsqueeze(0), kernel_size=7, stride=1, padding=3 ).squeeze(0) _out_mask = tmp.argmax(dim=0) _out_mask[tmp.max(0)[0] < 0.5] = -1 out_mask = torch.zeros_like(_out_mask) for i in range(inx_len): out_mask[_out_mask == i] = inx[i] return out_mask.cpu() # from PIL import Image # im = np.array(Image.fromarray(im).resize((im.shape[1] // 3, im.shape[0]//3))) # print(im.max()) vis_img = Visualizer( im, meta, instance_mode=ColorMode.IMAGE_BW, scale=scale ).draw_pan_seg( upsample_mask(pan_mask, (im.shape[0], im.shape[1])), pan_ann['segments_info'], alpha=0.5 ).get_image() return vis_img
nargs=argparse.REMAINDER, ) return parser if __name__ == "__main__": mp.set_start_method("spawn", force=True) args = get_parser().parse_args() logger = setup_logger() logger.info("Arguments: " + str(args)) cfg = setup_cfg(args) from detectron2.data.catalog import Metadata metadata = Metadata( evaluator_type='coco', name='PubLayNet', thing_classes=["text", "title", "list", "figure", "table"]) demo = VisualizationDemo(cfg, metadata) if args.input: if len(args.input) == 1: args.input = glob.glob(os.path.expanduser(args.input[0])) assert args.input, "The input path(s) was not found" for path in tqdm.tqdm(args.input, disable=not args.output): # use PIL, to be consistent with evaluation img = read_image(path, format="BGR") start_time = time.time() predictions, visualized_output = demo.run_on_image(img) logger.info("{}: detected {} instances in {:.2f}s".format( path, len(predictions["instances"]), time.time() - start_time))
parser.add_argument("image_dir") parser.add_argument("--type", choices=["instance", "semantic"], default="instance") args = parser.parse_args() from detectron2.data.catalog import Metadata from detectron2.utils.visualizer import Visualizer logger = setup_logger(name=__name__) dirname = "cornell-data-vis" os.makedirs(dirname, exist_ok=True) if args.type == "instance": dicts = load_cornell_instances( args.image_dir, to_polygons=True ) logger.info("Done loading {} samples.".format(len(dicts))) meta = Metadata().set( thing_classes=["grasp", "nograsp"], #thing_classes=[f"{sector}grasp" for sector in range(18)],#os.listdir(args.image_dir), stuff_classes=["nothing", "thing"] ) for d in dicts: img = np.array(Image.open(d["file_name"])) visualizer = Visualizer(img, metadata=meta) vis = visualizer.draw_dataset_dict(d) # cv2.imshow("a", vis.get_image()[:, :, ::-1]) # cv2.waitKey() fpath = os.path.join(dirname, os.path.basename(d["file_name"])) vis.save(fpath)
class BlendPredictor: shrink_threshold = .15 def __init__(self, panoptic_config, mask_config, panoptic_predictor=None, mask_predictor=None, panoptic_metadata=None, mask_metadata=None, shrink_threshold=None): self.po_predictor = panoptic_predictor if panoptic_predictor is not None else DefaultPredictor(panoptic_config) self.mask_predictor = mask_predictor if mask_predictor is not None else DefaultPredictor(mask_config) self.po_md = panoptic_metadata if panoptic_metadata is not None else MetadataCatalog.get(panoptic_config.DATASETS.TRAIN[0]) self.mask_md = mask_metadata if mask_metadata is not None else MetadataCatalog.get(mask_config.DATASETS.TRAIN[0]) self.shrink_threshold = shrink_threshold if shrink_threshold is not None else self.shrink_threshold self._merge_metadata() def _merge_metadata(self): added_classes = self.mask_md.get('thing_classes') total_classes = self.po_md.get('thing_classes').copy() stuff_classes = self.po_md.get('stuff_classes') self.offset = len(total_classes) for c in added_classes: if c not in total_classes: total_classes.append(c) else: total_classes.append(f"custom_{c}") self.blend_md = Metadata(thing_classes=total_classes, stuff_classes=stuff_classes) def predict(self, img): # First lets run the predictions self.panoptic_seg, self.panoptic_seg_info = self.po_predictor(img)["panoptic_seg"] self.mask_output = self.mask_predictor(img) return self.blend_segs(self.panoptic_seg, self.panoptic_seg_info, self.mask_output) def blend_segs(self, panoptic_seg, panoptic_seg_info, mask_output): total_classes = self.blend_md.get('thing_classes') po_seg = panoptic_seg.to("cpu").numpy() blend_seg = po_seg.copy() max_seg = blend_seg.max() blend_info = panoptic_seg_info.copy() masks = mask_output["instances"].to("cpu").get("pred_masks").numpy() scores = mask_output["instances"].to("cpu").get("scores").numpy() classes = mask_output["instances"].to("cpu").get("pred_classes").numpy() # basic blending new_seg_info = [] instance_ids = {} for i in range(len(classes)): iid = max_seg + i + 1 c = classes[i] m = masks[i] s = scores[i] if c in instance_ids: instance_ids[c] += 1 else: instance_ids[c] = 0 blend_seg = blend_seg * ((m - 1) * -1) blend_seg = blend_seg + (m * iid) area = np.count_nonzero(blend_seg == iid) info = {"id": iid, "isthing": True, "score": s, "category_id": c + self.offset, "instance_id": instance_ids[c], "area": area} blend_info.append(info) # Remove "dead" instances where the instance has lost more than N% of volume final_info = [] for i in range(len(blend_info)): seg = blend_info[i] if i >= len(panoptic_seg_info): final_info.append(seg) continue orig_seg = panoptic_seg_info[i] iid = seg["id"] if "area" in orig_seg: orig_area = orig_seg["area"] else: orig_area = np.count_nonzero(po_seg==iid) new_area = np.count_nonzero(blend_seg==iid) pct = new_area/orig_area if pct > self.shrink_threshold: seg["area"] = new_area final_info.append(seg) else: print(f"REMOVING ID: {iid}, Category: {total_classes[seg['category_id']]}, Orig Area: {orig_area}, New Area: {new_area}, PCT: {pct}") blend_seg = np.where(blend_seg == iid, 0, blend_seg) final_seg = torch.tensor(blend_seg) return final_seg, blend_info
def main(): st.title('Household Amenity Detection Project 👁') st.write( "This Project is inspired by [Airbnb's machine learning powered amenity detection](https://medium.com/airbnb-engineering/amenity-detection-and-beyond-new-frontiers-of-computer-vision-at-airbnb-144a4441b72e)." ) st.write( "And also by [Daniel Bourke's Airbnb amenity detection replication](https://github.com/mrdbourke/airbnb-amenity-detection)." ) st.subheader('How does it work?') st.write("1. Upload an image in either JPG or PNG or JPEG format.") st.write( "2. Pick a probability threshold to determine what object + boxes to render." ) st.write( " Only objects with higher than threshold probability will be rendered." ) st.write("3. Click the Make Prediction Button to run the model.") st.image(Image.open('demo.jpg'), use_column_width=True) st.subheader('Input File') objects = ['Bathtub', 'Bed', 'Billiard table', 'Ceiling fan', \ 'Coffeemaker', 'Couch', 'Countertop', 'Dishwasher', \ 'Fireplace', 'Fountain', 'Gas stove', 'Jacuzzi', \ 'Kitchen & dining room table', 'Microwave oven', \ 'Mirror', 'Oven', 'Pillow', 'Porch', 'Refrigerator', \ 'Shower', 'Sink', 'Sofa bed', 'Stairs', 'Swimming pool', \ 'Television', 'Toilet', 'Towel', 'Tree house', 'Washing machine', 'Wine rack'] # load model predictor = load_model() # create metadata data_metadata = Metadata(name='data_train', evaluator_type='coco', thing_classes=objects) uploaded_file = st.file_uploader( "Upload an Image", type=["png", "jpg", "jpeg", "JPG", "PNG", "JPEG"]) if uploaded_file is not None: image = Image.open(uploaded_file) st.image(image, caption='Uploaded Image', use_column_width=True) # Make sure image is RGB image = image.convert("RGB") st.subheader('Output:') st.write( "Pick a prediction threshold where only objects with probabilities above the threshold will be displayed!" ) pred_threshold = st.slider('Prediction Threshold:', 0.0, 1.0, 0.25) # get inference on image and display if button is clicked if st.button("Make Prediction"): start_time = time.time() # Some number in the range 0-1 (probabilities) with st.spinner("Doing Prediction..."): custom_pred, filt_instance = inference(image, predictor, data_metadata, pred_threshold) end_time = time.time() st.subheader('Predictions: ') # need to convert CV2 format to PIL format custom_pred = cv2.cvtColor(custom_pred, cv2.COLOR_RGB2BGR) st.image(custom_pred, caption='Predictions Image', use_column_width=True) st.write('Predicted Classes and Probabilities: ') # save predictions to dataframe pred_df = pd.DataFrame() object_name = [] for elem in filt_instance.pred_classes.numpy(): object_name.append(objects[elem]) pred_df['Classes'] = object_name pred_df['Probabilities'] = filt_instance.scores.numpy() if pred_df.shape[0] == 0: st.write('No Objects Detected!') else: st.write(pred_df) # write prediction time pred_time = end_time - start_time st.write('Prediction Time: ' + ' {0:.2f}'.format(pred_time) + ' seconds') st.write("") st.subheader("What is under the hood?") st.write( "Detectron2 RetinaNet model (PyTorch) and Streamlit web application") st.image(Image.open('logo.jpg'), use_column_width=True) st.subheader("Supported Classes/Objects:") st.write("• Bathtub • Bed • Billiard Table") st.write("• Ceiling Fan • Coffeemaker • Couch") st.write("• Countertop • Dishwasher • Fireplace") st.write("• Fountain • Gas Stove • Jacuzzi") st.write("• Dining Table • Microwave Oven • Mirror") st.write("• Oven • Pillow • Porch") st.write("• Refrigerator • Shower • Sink") st.write("• Sofa bed • Stairs • Swimming Pool") st.write("• Television • Toilet • Towel") st.write("• Tree house • Washing Machine • Wine Rack")
logger = setup_logger(name=__name__) dirname = "cityscapes-data-vis" os.makedirs(dirname, exist_ok=True) if args.type == "instance": dicts = load_cityscapes_instances(args.image_dir, args.gt_dir, from_json=True, to_polygons=True) logger.info("Done loading {} samples.".format(len(dicts))) thing_classes = [ k.name for k in labels if k.hasInstances and not k.ignoreInEval ] meta = Metadata().set(thing_classes=thing_classes) else: dicts = load_cityscapes_semantic(args.image_dir, args.gt_dir) logger.info("Done loading {} samples.".format(len(dicts))) stuff_names = [k.name for k in labels if k.trainId != 255] stuff_colors = [k.color for k in labels if k.trainId != 255] meta = Metadata().set(stuff_names=stuff_names, stuff_colors=stuff_colors) for d in dicts: img = np.array(Image.open(PathManager.open(d["file_name"], "rb"))) visualizer = Visualizer(img, metadata=meta) vis = visualizer.draw_dataset_dict(d) # cv2.imshow("a", vis.get_image()[:, :, ::-1])
parser.add_argument("--gt_dir",type=str, default="datasets/ctf/char/json") parser.add_argument("--type", choices=["field", "char"], default="char") args = parser.parse_args() from detectron2.data.catalog import Metadata from detectron2.utils.visualizer import Visualizer logger = setup_logger(name=__name__) dirname = "ctf-data-vis" os.makedirs(dirname, exist_ok=True) dicts = load_ctf_json(args.image_dir, args.gt_dir, args.type) logger.info("Done loading {} samples.".format(len(dicts))) thing_classes = [k.name for k in labels_ctf] meta = Metadata().set(thing_classes=thing_classes) # stuff_names = [k.name for k in labels] # stuff_colors = [k.color for k in labels] # meta = Metadata().set(stuff_names=stuff_names, stuff_colors=stuff_colors) for d in dicts: img = np.array(Image.open(d["file_name"])) visualizer = Visualizer(img,metadata=meta) vis = visualizer.draw_dataset_dict(d) cv2.imshow("a", vis.get_image()[:, :, ::-1]) cv2.waitKey() fpath = os.path.join(dirname, os.path.basename(d["file_name"])) vis.save(fpath)