def complex_yolo(pointcloud): pointcloud = get_filtered_lidar(pointcloud, cnf.boundary) bev_maps = makeBEVMap(pointcloud, cnf.boundary) bev_maps = torch.from_numpy(bev_maps) bev_maps = torch.unsqueeze(bev_maps, 0) input_bev_maps = bev_maps.to(configs.device, non_blocking=True).float() t1 = time_synchronized() outputs = model(input_bev_maps) outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) # detections size (batch_size, K, 10) detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'],outputs['dim'], K=configs.K) detections = detections.cpu().detach().numpy().astype(np.float32) detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) t2 = time_synchronized() detections = detections[0] # only first batch # Draw prediction in the image bev_map = (bev_maps.squeeze().permute(1, 2, 0).numpy() * 255).astype(np.uint8) bev_map = cv2.resize(bev_map, (cnf.BEV_WIDTH, cnf.BEV_HEIGHT)) bev_map = draw_predictions(bev_map, detections.copy(), configs.num_classes) bev_map = cv2.rotate(bev_map, cv2.ROTATE_180) cv2.imshow("BEV", bev_map) print('\tDone testing in time: {:.1f}ms, speed {:.2f}FPS'.format((t2 - t1) * 1000,1 / (t2 - t1)))
configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) model = model.to(device=configs.device) model.eval() out_cap = None demo_dataset = Demo_KittiDataset(configs) with torch.no_grad(): for sample_idx in range(len(demo_dataset)): metadatas, bev_map, img_rgb = demo_dataset.load_bevmap_front(sample_idx) detections, bev_map, fps = do_detect(configs, model, bev_map, is_front=True) # Draw prediction in the image bev_map = (bev_map.permute(1, 2, 0).numpy() * 255).astype(np.uint8) bev_map = cv2.resize(bev_map, (cnf.BEV_WIDTH, cnf.BEV_HEIGHT)) bev_map = draw_predictions(bev_map, detections, configs.num_classes) # Rotate the bev_map bev_map = cv2.rotate(bev_map, cv2.ROTATE_180) img_path = metadatas['img_path'][0] img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR) calib = Calibration(configs.calib_path) kitti_dets = convert_det_to_real_values(detections) if len(kitti_dets) > 0: kitti_dets[:, 1:] = lidar_to_camera_box(kitti_dets[:, 1:], calib.V2C, calib.R0, calib.P2) img_bgr = show_rgb_image_with_boxes(img_bgr, kitti_dets, calib) out_img = merge_rgb_to_bev(img_bgr, bev_map, output_width=configs.output_width) write_credit(out_img, (80, 210), text_author='Cre: github.com/maudzung', org_fps=(80, 250), fps=fps) if out_cap is None:
detections = get_final_pred(detections[0], configs.num_classes, configs.peak_thresh) import pdb pdb.set_trace() img = imgs.squeeze().permute(1, 2, 0).numpy() img = denormalize_img(img) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) calib = kitti_data_utils.Calibration(img_paths[0].replace( ".png", ".txt").replace("image_2", "calib")) K = calib.P2 # Draw prediction in the image out_img = draw_predictions(cv2.imread(img_paths[0]), detections, cnf.colors, K, configs.num_classes, configs.show_3dbox) print( '\tDone testing the {}th sample, time: {:.1f}ms, speed {:.2f}FPS' .format(batch_idx, (t2 - t1) * 1000, 1 / (t2 - t1))) if configs.save_test_output: if configs.output_format == 'image': img_fn = os.path.basename(img_paths[0])[:-4] cv2.imwrite( os.path.join(configs.results_dir, '{}.jpg'.format(img_fn)), out_img) elif configs.output_format == 'video': if out_cap is None: out_cap_h, out_cap_w = out_img.shape[:2] fourcc = cv2.VideoWriter_fourcc(*'MJPG')
def callback(self, data): rospy.loginfo("detection") with torch.no_grad(): gen = point_cloud2.read_points(data) #print(type(gen)) cloudata = [] for idx, p in enumerate(gen): data = np.array([p[0], p[1], p[2], p[3]]) data = data.reshape((1, 4)) cloudata.append(data) lidarData = np.concatenate([x for x in cloudata], axis=0) lidarData = get_filtered_lidar(lidarData, cnf.boundary) res = self.voxel_generator.generate(lidarData, 20000) coorinput = np.pad(res["coordinates"], ((0, 0), (1, 0)), mode='constant', constant_values=0) voxelinput = res["voxels"] numinput = res["num_points_per_voxel"] dtype = torch.float32 voxelinputr = torch.tensor(voxelinput, dtype=torch.float32, device=configs.device).to(dtype) coorinputr = torch.tensor(coorinput, dtype=torch.int32, device=configs.device) numinputr = torch.tensor(numinput, dtype=torch.int32, device=configs.device) outputs = self.model(voxelinputr, coorinputr, numinputr) outputs = outputs._asdict() outputs['hm_cen'] = _sigmoid(outputs['hm_cen']) outputs['cen_offset'] = _sigmoid(outputs['cen_offset']) # detections size (batch_size, K, 10) detections = decode(outputs['hm_cen'], outputs['cen_offset'], outputs['direction'], outputs['z_coor'], outputs['dim'], K=configs.K) detections = detections.cpu().numpy().astype(np.float32) detections = post_processing(detections, configs.num_classes, configs.down_ratio, configs.peak_thresh) detections = detections[0] bev_map = np.ones((432, 432, 3), dtype=np.uint8) bev_map = bev_map * 0.5 bev_map = makeBEVMap(lidarData, cnf.boundary) bev_map = bev_map.transpose((1, 2, 0)) * 255 #bev_map = (bev_maps.squeeze().permute(1, 2, 0).numpy() * 255).astype(np.uint8) bev_map = cv2.resize(bev_map, (cnf.BEV_WIDTH, cnf.BEV_HEIGHT)) bev_map = draw_predictions(bev_map, detections.copy(), configs.num_classes) # Rotate the bev_map bev_map = cv2.rotate(bev_map, cv2.ROTATE_180) out_img = bev_map cv2.imshow('test-img', out_img) cv2.waitKey(1)
configs.device = torch.device('cpu' if configs.no_cuda else 'cuda:{}'.format(configs.gpu_idx)) model = model.to(device=configs.device) model.eval() out_cap = None demo_dataset = Demo_KittiDataset(configs) with torch.no_grad(): for sample_idx in range(len(demo_dataset)): metadatas, front_bevmap, back_bevmap, img_rgb = demo_dataset.load_bevmap_front_vs_back(sample_idx) front_detections, front_bevmap, fps = do_detect(configs, model, front_bevmap, is_front=True) back_detections, back_bevmap, _ = do_detect(configs, model, back_bevmap, is_front=False) # Draw prediction in the image front_bevmap = (front_bevmap.permute(1, 2, 0).numpy() * 255).astype(np.uint8) front_bevmap = cv2.resize(front_bevmap, (cnf.BEV_WIDTH, cnf.BEV_HEIGHT)) front_bevmap = draw_predictions(front_bevmap, front_detections, configs.num_classes) # Rotate the front_bevmap front_bevmap = cv2.rotate(front_bevmap, cv2.ROTATE_90_COUNTERCLOCKWISE) # Draw prediction in the image back_bevmap = (back_bevmap.permute(1, 2, 0).numpy() * 255).astype(np.uint8) back_bevmap = cv2.resize(back_bevmap, (cnf.BEV_WIDTH, cnf.BEV_HEIGHT)) back_bevmap = draw_predictions(back_bevmap, back_detections, configs.num_classes) # Rotate the back_bevmap back_bevmap = cv2.rotate(back_bevmap, cv2.ROTATE_90_CLOCKWISE) # merge front and back bevmap full_bev = np.concatenate((back_bevmap, front_bevmap), axis=1) img_path = metadatas['img_path'][0] img_bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
# detections size (batch_size, K, 38) detections = rtm3d_decode(outputs['hm_mc'], outputs['hm_ver'], outputs['vercoor'], outputs['cenoff'], outputs['veroff'], outputs['wh'], outputs['rot'], outputs['depth'], outputs['dim'], K=configs.K) detections = detections.cpu().numpy() detections = post_processing_2d(detections, configs.num_classes, configs.down_ratio) detections = get_final_pred(detections[0], configs.num_classes, configs.peak_thresh) img = imgs.squeeze().permute(1, 2, 0).numpy() img = denormalize_img(img) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # calib = kitti_data_utils.Calibration(img_paths[0].replace(".png", ".txt").replace("image_2", "calib")) # Draw prediction in the image out_img = draw_predictions(img, detections, cnf.colors, configs.num_classes) print('\tDone testing the {}th sample, time: {:.1f}ms, speed {:.2f}FPS'.format(batch_idx, (t2 - t1) * 1000, 1 / (t2 - t1))) if configs.save_test_output: if configs.output_format == 'image': img_fn = os.path.basename(img_paths[0])[:-4] cv2.imwrite(os.path.join(configs.results_dir, '{}.jpg'.format(img_fn)), out_img) elif configs.output_format == 'video': if out_cap is None: out_cap_h, out_cap_w = out_img.shape[:2] fourcc = cv2.VideoWriter_fourcc(*'MJPG') out_cap = cv2.VideoWriter( os.path.join(configs.results_dir, '{}.avi'.format(configs.output_video_fn)), fourcc, 30, (out_cap_w, out_cap_h))