def inference_by_idx(): global BACKEND instance = request.json response = {"status": "normal"} if BACKEND.root_path is None: return error_response("root path is not set") image_idx = instance["image_idx"] # remove_outside = instance["remove_outside"] idx = BACKEND.image_idxes.index(image_idx) example = BACKEND.dataset[idx] # don't forget to pad batch idx in coordinates example["coordinates"] = np.pad(example["coordinates"], ((0, 0), (1, 0)), mode='constant', constant_values=0) # don't forget to add newaxis for anchors example["anchors"] = example["anchors"][np.newaxis, ...] example_torch = example_convert_to_torch(example, device=BACKEND.device) pred = BACKEND.net(example_torch)[0] # calls forward function box3d = pred["box3d_lidar"].detach().cpu().numpy() locs = box3d[:, :3] dims = box3d[:, 3:6] rots = np.concatenate( [np.zeros([locs.shape[0], 2], dtype=np.float32), -box3d[:, 6:7]], axis=1) response["dt_locs"] = locs.tolist() response["dt_dims"] = dims.tolist() response["dt_rots"] = rots.tolist() response["dt_labels"] = pred["label_preds"].detach().cpu().numpy().tolist() response["dt_scores"] = pred["scores"].detach().cpu().numpy().tolist() response = jsonify(results=[response]) response.headers['Access-Control-Allow-Headers'] = '*' return response
def _inference(self, example): train_cfg = self.config.train_config input_cfg = self.config.eval_input_reader model_cfg = self.config.model.second example_torch = example_convert_to_torch(example) result_annos = predict_to_kitti_label( self.net, example_torch, list(self.target_assigner.classes), model_cfg.post_center_limit_range, model_cfg.lidar_input) return result_annos
def _inference(self, example): train_cfg = self.config.train_config input_cfg = self.config.eval_input_reader model_cfg = self.config.model.second example_torch = example_convert_to_torch(example) if train_cfg.enable_mixed_precision: float_dtype = torch.float16 else: float_dtype = torch.float32 predictions_dicts = self.net(example_torch) return predictions_dicts
def _inference(self, example): train_cfg = self.config.train_config input_cfg = self.config.eval_input_reader model_cfg = self.config.model.second if train_cfg.enable_mixed_precision: float_dtype = torch.half else: float_dtype = torch.float32 example_torch = example_convert_to_torch(example, float_dtype) result_annos = predict_kitti_to_anno( self.net, example_torch, list(self.target_assigner.classes), model_cfg.post_center_limit_range, model_cfg.lidar_input) return result_annos
def main(): cfg_path = Path('/..../pointpillars/car/xyres_##.config') ckpt_path = Path('/..../voxelnet-######.tckpt') config = pipeline_pb2.TrainEvalPipelineConfig() print("config reading") with open(cfg_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("building net") net = build_network(config.model.second).to(device).float().eval() net.load_state_dict(torch.load(ckpt_path)) print("net built") eval_input_cfg = config.eval_input_reader dataset = input_reader_builder.build( eval_input_cfg, config.model.second, training=False, voxel_generator=net.voxel_generator, target_assigner=net.target_assigner).dataset idx = 0 example = dataset[idx] example["coordinates"] = np.pad(example["coordinates"], ((0, 0), (1, 0)), mode='constant', constant_values=0) # don't forget to add newaxis for anchors example["anchors"] = example["anchors"][np.newaxis, ...] example_torch = example_convert_to_torch(example, device=device) voxels = example_torch["voxels"] num_points = example_torch["num_points"] coors = example_torch["coordinates"] batch_anchors = example["anchors"] batch_size_dev = batch_anchors.shape[0] voxel_features = net.voxel_feature_extractor(voxels, num_points, coors) spatial_features = net.middle_feature_extractor(voxel_features, coors, batch_size_dev) # Export the model print("exporting as onnx") torch_out = torch.onnx._export(net.rpn, (spatial_features), "rpn.onnx", export_params=True) print("export complete")
def test(config_path=args.config_path, model_dir=args.model_dir, result_path=None, create_folder=False, pickle_result=True, include_roadmap=False, device=1): """train a VoxelNet model specified by a config file. """ if create_folder: if pathlib.Path(model_dir).exists(): model_dir = torchplus.train.create_folder(model_dir) model_dir = pathlib.Path(model_dir) model_dir.mkdir(parents=True, exist_ok=True) eval_checkpoint_dir = model_dir / 'eval_checkpoints' eval_checkpoint_dir.mkdir(parents=True, exist_ok=True) if result_path is None: result_path = model_dir / 'results' config_file_bkp = "pipeline.config" config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) shutil.copyfile(config_path, str(model_dir / config_file_bkp)) input_cfg = config.train_input_reader eval_input_cfg = config.eval_input_reader model_cfg = config.model.second train_cfg = config.train_config batch_size = 1 class_names = list(input_cfg.class_names) ###################### # BUILD VOXEL GENERATOR ###################### voxel_generator = voxel_builder.build(model_cfg.voxel_generator) grid_size = voxel_generator.grid_size ###################### # BUILD TARGET ASSIGNER ###################### bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] box_coder = box_coder_builder.build(model_cfg.box_coder) target_assigner_cfg = model_cfg.target_assigner target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range, box_coder) ###################### # BUILD NET ###################### center_limit_range = model_cfg.post_center_limit_range net = second_builder.build(model_cfg, voxel_generator, target_assigner, include_roadmap) net.cuda().eval() print("num_trainable parameters:", len(list(net.parameters()))) # for n, p in net.named_parameters(): # print(n, p.shape) #torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) torchplus.train.restore(args.model_path, net) #torchplus.train.restore("./ped_models_56/voxelnet-275130.tckpt",net) out_size_factor = model_cfg.rpn.layer_strides[ 0] / model_cfg.rpn.upsample_strides[0] print(out_size_factor) #out_size_factor *= model_cfg.middle_feature_extractor.downsample_factor out_size_factor = int(out_size_factor) feature_map_size = grid_size[:2] // out_size_factor feature_map_size = [*feature_map_size, 1][::-1] print(feature_map_size) ret = target_assigner.generate_anchors(feature_map_size) #anchors_dict = target_assigner.generate_anchors_dict(feature_map_size) anchors = ret["anchors"] anchors = anchors.reshape([-1, 7]) matched_thresholds = ret["matched_thresholds"] unmatched_thresholds = ret["unmatched_thresholds"] anchors_bv = box_np_ops.rbbox2d_to_near_bbox(anchors[:, [0, 1, 3, 4, 6]]) anchor_cache = { "anchors": anchors, "anchors_bv": anchors_bv, "matched_thresholds": matched_thresholds, "unmatched_thresholds": unmatched_thresholds, #"anchors_dict": anchors_dict, } am = ArgoverseMap() dt_annos = [] root_dir = os.path.join('./../../argodataset/argoverse-tracking/', args.set) argoverse_loader = ArgoverseTrackingLoader(root_dir) prog_cnt = 0 for seq in range(len(argoverse_loader)): argoverse_data = argoverse_loader[seq] nlf = argoverse_data.num_lidar_frame for frame in range(nlf): prog_cnt += 1 if prog_cnt % 50 == 0: print(prog_cnt) points = argoverse_data.get_lidar(frame) roi_pts = copy.deepcopy(points) city_name = argoverse_data.city_name city_to_egovehicle_se3 = argoverse_data.get_pose(frame) ''' roi_pts = city_to_egovehicle_se3.transform_point_cloud(roi_pts) # put into city coords #non roi roi_pts_flag = am.remove_non_roi_points(roi_pts, city_name) # remove non-driveable region roi_pts = roi_pts[roi_pts_flag] roi_pts = am.remove_ground_surface(roi_pts, city_name) # remove ground surface # convert city to lidar co-ordinates roi_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(roi_pts) ''' if args.include_roi or args.dr_area or not args.include_road_points: roi_pts = city_to_egovehicle_se3.transform_point_cloud( roi_pts) # put into city coords if args.include_roi: roi_pts_flag = am.remove_non_roi_points( roi_pts, city_name) # remove non-driveable region roi_pts = roi_pts[roi_pts_flag] if not args.include_roi and args.dr_area: roi_pts_flag = am.remove_non_driveable_area_points( roi_pts, city_name) # remove non-driveable region roi_pts = roi_pts[roi_pts_flag] if not args.include_road_points: roi_pts = am.remove_ground_surface( roi_pts, city_name) # remove ground surface # convert city to lidar co-ordinates if args.include_roi or args.dr_area or not args.include_road_points: roi_pts = city_to_egovehicle_se3.inverse_transform_point_cloud( roi_pts) roi_pts[:, 2] = roi_pts[:, 2] - 1.73 pts_x, pts_y, pts_z = roi_pts[:, 0], roi_pts[:, 1], roi_pts[:, 2] input_dict = { 'points': roi_pts, 'pointcloud_num_features': 3, } out_size_factor = model_cfg.rpn.layer_strides[ 0] // model_cfg.rpn.upsample_strides[0] example = prep_pointcloud( input_dict=input_dict, root_path=None, voxel_generator=voxel_generator, target_assigner=target_assigner, max_voxels=input_cfg.max_number_of_voxels, class_names=list(input_cfg.class_names), training=False, create_targets=False, shuffle_points=input_cfg.shuffle_points, generate_bev=False, without_reflectivity=model_cfg.without_reflectivity, num_point_features=model_cfg.num_point_features, anchor_area_threshold=input_cfg.anchor_area_threshold, anchor_cache=anchor_cache, out_size_factor=out_size_factor, out_dtype=np.float32) if "anchors_mask" in example: example["anchors_mask"] = example["anchors_mask"].astype( np.uint8) example["image_idx"] = str(seq) + "_" + str(frame) example["image_shape"] = np.array([400, 400], dtype=np.int32) example["road_map"] = None example["include_roadmap"] = False example["points"] = roi_pts #torch.save(example,"./network_input_examples/" + info) example = merge_second_batch([example]) example_torch = example_convert_to_torch(example, device=args.device) try: result_annos = predict_kitti_to_anno( net, example_torch, input_cfg.class_names, model_cfg.post_center_limit_range, model_cfg.lidar_input) except: print(seq, frame) continue dt_annos += result_annos if pickle_result: sdi = args.save_path.rfind('/') save_dir = args.save_path[:sdi] if not os.path.exists(save_dir): os.mkdir(save_dir) with open(args.save_path, 'wb') as f: pickle.dump(dt_annos, f)
+ str(box.wlh[0]) + ' ' + str(box.wlh[1]) + ' ' + \ str(box.wlh[2]) + ' ' + str(box.orientation.yaw_pitch_roll[0]) \ + ' ' + str(name) + ' ' pred_str += pred return pred_str.strip() # In[12]: token2predstr = {} detections = [] #tokens = [] tk0 = tqdm(dataloader, total=len(dataloader)) for idx, examples in enumerate(tk0): try: example_torch = example_convert_to_torch(examples, device=device) detections += net(example_torch) #tokens += examples['metadata'] except Exception as e: print(e) import pdb pdb.set_trace() threshold = 0.2 for idx, pred in enumerate(tqdm(detections)): pred = thresholded_pred(pred, threshold) #token = tokens[idx]['token'] token = pred['metadata']['token'] pred_str = get_pred_str(pred, token) index = df[df['Id'] == token].index[0] df.loc[index, 'PredictionString'] = pred_str
def main(config_path, lc_horizon, num_examples, model_dir, ckpt_path=None, **kwargs): """Don't support pickle_result anymore. if you want to generate kitti label file, please use kitti_anno_to_label_file and convert_detection_to_kitti_annos in second.data.kitti_dataset. """ assert len(kwargs) == 0 model_dir = str(Path(model_dir).resolve()) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if isinstance(config_path, str): # directly provide a config object. this usually used # when you want to eval with several different parameters in # one script. config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) else: config = config_path input_cfg = config.eval_input_reader input_cfg.cum_lc_wrapper.lc_horizon = lc_horizon model_cfg = config.model.second train_cfg = config.train_config net = build_network(model_cfg, measure_time=False).to(device) if train_cfg.enable_mixed_precision: net.half() print("half inference!") net.metrics_to_float() net.convert_norm_to_float(net) target_assigner = net.target_assigner voxel_generator = net.voxel_generator if ckpt_path is None: assert model_dir is not None torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) else: torchplus.train.restore(ckpt_path, net) batch_size = 1 eval_dataset = input_reader_builder.build(input_cfg, model_cfg, training=False, voxel_generator=voxel_generator, target_assigner=target_assigner, net=net) if train_cfg.enable_mixed_precision: float_dtype = torch.float16 else: float_dtype = torch.float32 net.eval() t = time.time() detections = [] print("Generate output labels...") bar = ProgressBar() bar.start((len(eval_dataset) + batch_size - 1) // batch_size) prep_example_times = [] prep_times = [] t2 = time.time() times = [] for scene_id in trange(num_examples): idx = eval_dataset.scene_id_and_step_to_idx(scene_id, lc_horizon) torch.cuda.synchronize() b_ex_time = time.time() example = eval_dataset[idx] example = merge_second_batch([example]) example = example_convert_to_torch(example, float_dtype) with torch.no_grad(): detections = net(example) torch.cuda.synchronize() e_ex_time = time.time() del example, detections times.append(e_ex_time - b_ex_time) times = np.array(times) mean = times.mean() interval = 1.96 * times.std() / np.sqrt( len(times)) # 95% confidence interval return mean, interval
def predict(config_path, model_dir, result_path=None, predict_test=False, ckpt_path=None, ref_detfile=None, pickle_result=True, bb_save_dir=None, pub_bb=None, pub_lidar=None): ''' Setup network and provide useful output ''' #################### # SETUP PARAMETERS # #################### model_dir = pathlib.Path(model_dir) if predict_test: result_name = 'predict_test' else: result_name = 'eval_results' if result_path is None: result_path = model_dir / result_name else: result_path = pathlib.Path(result_path) config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) # TODO: include this program as a function call in the localization/mapping code as needed # TODO: use whole pointcloud data instead of reduced pointcloud # TODO: [Done] store data in respective pcd and bounding box (csv) files # TODO: [Done] create a cpp file to read and show (n number of) pcd files with respective bounding boxes # > [Done] Check if pcl_viewer can open pcd # > [Done] Check if pcl_viewer can be called from a cpp program for vizualization # > [Done] Check if that cpp program can also show a bounding box input_cfg = config.eval_input_reader # Read the config file data into useful structures model_cfg = config.model.second # Read the config file data into useful structures train_cfg = config.train_config # Read the config file data into useful structures class_names = list(input_cfg.class_names) center_limit_range = model_cfg.post_center_limit_range ######################### # BUILD VOXEL GENERATOR # ######################### voxel_generator = voxel_builder.build(model_cfg.voxel_generator) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] box_coder = box_coder_builder.build(model_cfg.box_coder) target_assigner_cfg = model_cfg.target_assigner target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range, box_coder) ##################### # NETWORK GENERATOR # ##################### # Build the NN in GPU mode net = second_builder.build(model_cfg, voxel_generator, target_assigner) net.cuda() # Standard conversion approach if using FloatingPoint16 instead of FloatingPoint32 type of tensor if train_cfg.enable_mixed_precision: net.half() net.metrics_to_float() net.convert_norm_to_float(net) float_dtype = torch.float16 else: float_dtype = torch.float32 # Restore old checkpoint if possible if ckpt_path is None: torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) else: torchplus.train.restore(ckpt_path, net) # Setup network for evaluation mode net.eval() ##################### # DATASET GENERATOR # ##################### # Dataset build for easy usage eval_dataset = input_reader_builder.build(input_cfg, model_cfg, training=False, voxel_generator=voxel_generator, target_assigner=target_assigner) eval_dataloader = torch.utils.data.DataLoader( eval_dataset, batch_size=input_cfg.batch_size, shuffle=False, num_workers=input_cfg.num_workers, pin_memory=False, collate_fn=merge_second_batch) # Further variable setup result_path_step = result_path / f"step_{net.get_global_step()}" result_path_step.mkdir(parents=True, exist_ok=True) t = time.time() dt_annos = [] global_set = None print() print("Generate output labels...") bar = ProgressBar() bar.start(len(eval_dataset) // input_cfg.batch_size + 1) ################# # NETWORK USAGE # ################# # Predict a set of 'num_workers' samples, get info and reformat data as needed # temp_count = 0 for example in iter(eval_dataloader): # pprint.pprint(example, width=1) # for key, value in example.items(): # print(key) # print(np.shape(value)) example = example_convert_to_torch(example, float_dtype) print(example['image_idx']) # pprint.pprint(example, width=1) # for key, value in example.items(): # print(key) # print(np.shape(value)) # # # # if pickle_result: # NOTE: Predict network output # start_time = time.time() predictions_dicts = net(example) # # Save copy of data if user requested # if save_pcd: # np.fromfile(str(v_path), dtype=np.float32, count=-1).reshape([-1, 4]) # # Publish original data # if pub_lidar: # data=PointCloud2() # # FIXME: Extract pointclound info from 'example' (use original kitti data file if needed) > publish # pub_lidar.publish(data) # # Publish network output # if pub_bb: # data = MarkerArray() # # FIXME: Create a wireframe 3D bounding box and, if possible, a transluscent 3D cuboid as well > publish # pub_bb.publish(data) # # print('Network predict time: {}'.format(time.time()-start_time)) # pprint.pprint(predictions_dicts[0]) # for key, value in predictions_dicts[0].items(): # print(key) # print(np.shape(value)) if bb_save_dir: save_path = pathlib.Path(bb_save_dir) save_path.mkdir( parents=True, exist_ok=True ) # create directory (and its parents) if non-existent for pred_dict in predictions_dicts: if pred_dict['box3d_lidar'] is not None: bb_lidar = pred_dict['box3d_lidar'].detach().cpu().numpy() else: bb_lidar = [[ 'temp', 'temp', 'temp', 'temp', 'temp', 'temp', 'temp' ]] df = pd.DataFrame(bb_lidar) df.columns = ['x', 'y', 'z', 'w', 'l', 'h', 't'] filename = save_path.joinpath( str(pred_dict['image_idx']) + '.csv') filename.write_text(df.to_csv(index=False))
def detect(scene_token, config_path, ckpt_path, info_path, root_path, result_path): ### Read Config file torch.set_num_threads(2) #config_path = "configs/nuscenes/all.pp.lowa_large_range_v2.config" config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) input_cfg = config.eval_input_reader model_cfg = config.model.second # config_tool.change_detection_range_v2(model_cfg, [-50, -50, 50, 50]) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") ### Build Network, Target Assigner and Voxel Generator #info_path = '/home/itiv/Desktop/lyft-dataset/infos_val.pkl' #root_path = '/home/itiv/Desktop/lyft-dataset' with open(info_path, 'rb') as f: infos = pickle.load(f) token2info = {} for info in infos['infos']: token2info[info['token']] = info #ckpt_path = "/home/itiv/Desktop/repo/scenarios_in_CarMaker/BA_Daniel/Lyft-Detector/second.pytorch/second/model/model_large_range_v2/voxelnet-33445.tckpt" net = build_network(config.model.second).to(device).float().eval() net.load_state_dict(torch.load(ckpt_path)) eval_input_cfg = config.eval_input_reader eval_input_cfg.dataset.kitti_root_path = root_path eval_input_cfg.dataset.kitti_info_path = info_path dataset = input_reader_builder.build( eval_input_cfg, config.model.second, training=False, voxel_generator=net.voxel_generator, target_assigner=net.target_assigner) #.dataset batch_size = 2 num_workers = 2 dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=False, collate_fn=merge_second_batch) target_assigner = net.target_assigner voxel_generator = net.voxel_generator classes = target_assigner.classes detections = [] #tk0 = prog_bar(dataloader, total=len(dataloader)) tk0 = (dataloader) for idx, examples in enumerate(tk0): #print(idx) #print(examples) try: example_torch = example_convert_to_torch(examples, device=device) detections += net(example_torch) except Exception as e: print(e) import pdb pdb.set_trace() threshold = 0.2 first_sample_token = detections[0]['metadata']['token'] dict_detections = {"results": {}} for idx, pred in enumerate((detections)): pred = thresholded_pred(pred, threshold) #token = tokens[idx]['token'] token = pred['metadata']['token'] dict_detections['results'].update( get_pred_dict(pred, token, classes, token2info)) #pred_str = get_pred_str(pred, token) #predStrings.append(pred_str) #index = df[df['Id'] == token].index[0] #df.loc[index, 'PredictionString'] = pred_str #df.to_csv(f'final.csv', index=False) #print(dict_detections) #path_to_result = f'/home/itiv/Desktop/lyft-dataset/detections-largev2.json' with open(result_path + '/detections_' + scene_token + '.json', 'w') as fp: json.dump(dict_detections, fp)
def LC_PROCESS(prev_sensor_data, net, dataset, policy, sparsify_config): """ Processes prev_sensor_data and feeds it to the net, places light curtain using generated confidence scores, gets light curtain return, and adds it to a new sensor data. While processing the sensor_data, only the dataset._prep_main_func is used. No calls to _prep_data_aug and _prep_targets are made. sensor_data["lidar"]["points"] will be considered the main input to the model. prev_sensor_data["lidar"]["points"] will be used as the input cloud, and the output cloud will be saved into sensor_data["lidar"]["points"]. This only handles 4-dimensional points, of the form (x, y, z, intensity). Args: * prev_sensor_data: a dict containing the following items { "lidar": { "type": "lidar", "points": ... }, "calib": { ... }, "depth": { "type": "depth_map", "image": ... }, "init_lidar": { "num_beams": ..., "points": ... } } sensor_data["lidar"]["points"] constitutes the main input the network. It could be initialized by sensor_data["init_lidar"] for example. * net: (VoxelNet) * dataset: (Dataset) dataset. * policy: (Policy) one of the light curtain policies in second.light_curtain.policy. * sparsify_config: (config) options that are used to subsample a point cloud return. Returns: * dictionary of all information generated during the LC process { "prev_sensor_data": previous sensor data, "next_sensor_data": the new sensor data dict, "lc_image": lc_image returned by pylc API, "lc_cloud": lc_image processed into a point cloud; this is the main return, "net_pred": pred of the network, "net_preds_dict": preds_dict of the network, "confidence_map": network predictions converted to a confidence_map } * sensor_data: the same sensor_data, but with sensor_data["lidar"]["points"] replaced by the new cumulative point cloud. """ net_was_training = net.training net.eval() points = prev_sensor_data["lidar"]["points"] calib = prev_sensor_data["calib"] example = dataset._prep_func_main(points, calib) if "image_idx" in prev_sensor_data["metadata"]: example["metadata"] = prev_sensor_data["metadata"] if "anchors_mask" in example: example["anchors_mask"] = example["anchors_mask"].astype(np.uint8) # don't forget to pad batch idx in coordinates example["coordinates"] = np.pad(example["coordinates"], ((0, 0), (1, 0)), mode='constant', constant_values=0) # don't forget to add newaxis for anchors example["anchors"] = example["anchors"][np.newaxis, ...] with torch.no_grad(): example_torch = example_convert_to_torch(example) pred, preds_dict = net(example_torch, ret_preds_dict=True) # Creating confidence map. cls_preds = preds_dict['cls_preds'] # shape=(1, 2, 200, 176, 1) cls_preds = cls_preds[0, :, :, :, 0] # shape=(2, 200, 176) cls_preds = torch.sigmoid( cls_preds).detach().cpu().numpy() # shape=(2, 200, 176) anchors = example["anchors"][0] # (2 * 200 * 176, 7) anchors_mask = example.get("anchors_mask", None) # (200 * 176,) dtype=np.uint8 confidence_map = _get_confidence_map(anchors, anchors_mask, cls_preds, dataset.lc_device.TRANSFORMS["wTc"]) # Light curtain point cloud. depth_image = prev_sensor_data["depth"]["image"] # Design points should be in the camera frame. design_pts = policy.get_design_points(confidence_map) lc_image = dataset.lc_device.get_return(depth_image, design_pts) lc_cloud = lc_image.reshape(-1, 4) # (N, 4) # Remove points which are NaNs. non_nan_mask = np.all(np.isfinite(lc_cloud), axis=1) lc_cloud = lc_cloud[non_nan_mask] # (N, 4) # Convert lc_cloud to velo frame. lc_cloud_xyz1 = np.hstack( (lc_cloud[:, :3], np.ones([len(lc_cloud), 1], dtype=np.float32))) lc_cloud_xyz1 = lc_cloud_xyz1 @ dataset.lc_device.TRANSFORMS["cTw"].T lc_cloud[:, :3] = lc_cloud_xyz1[:, :3] # (N, 4) # Rescale LC return to [0, 1]. lc_cloud[:, 3] /= 255. lc_pts_added = lc_cloud if sparsify_config is not None: lc_pts_added = sparsify_lc_return(lc_pts_added, sparsify_config) next_sensor_data = prev_sensor_data.copy() next_sensor_data["lidar"] = prev_sensor_data["lidar"].copy() next_sensor_data["lidar"]["points"] = np.vstack( (next_sensor_data["lidar"]["points"], lc_pts_added)) # Reset training state of network. net.train(net_was_training) return { "prev_sensor_data": prev_sensor_data, "next_sensor_data": next_sensor_data, "lc_image": lc_image, "lc_cloud": lc_cloud, "net_pred": pred, "net_preds_dict": preds_dict, "confidence_map": confidence_map }
def process(self): """ Publishes: { "detections" : { "dt_locs", "dt_dims", "dt_rots", "dt_labels", "dt_scores" }, "confidence_map": (np.ndarray, dtype=float32, shape=(X, Z, 2+K)) confidence map of detector. Axis 0 corresponds to increasing X (camera frame) / decreasing Y (velo frame). Axis 1 corresponds to increasing Z (camera frame) / increasing X (velo frame). Axis 2 corresponds to (x, z, c_1, ..., c_K): - x : x in camera frame. - z : z in camera frame. - c_k : kth confidence score lying in [0, 1]. } """ while True: # collect lidar data if len(self.lidar.stream) == 0: raise Exception("Detector: lidar stream is empty! Detector needs lidar points.") lidar_points = self.lidar.stream[-1].data # (N, 3) lidar_points = np.hstack((lidar_points, np.ones([len(lidar_points), 1], dtype=np.float32))) # (N, 4) # collect light curtain return if len(self.light_curtain.stream) > 0: lc_points = [elem.data["lc_cloud"] for elem in self.light_curtain.stream] # list of (N, 4) lc_points = np.vstack(lc_points) # (N, 4) lc_points = self.sparsify_lc_points(lc_points) # (N, 4) else: lc_points = np.zeros([0, 4], dtype=np.float32) # combine lidar and light curtain points points = np.vstack([lidar_points, lc_points]) # (N, 4) example = self.preprocess_fn(points) if "anchors_mask" in example: example["anchors_mask"] = example["anchors_mask"].astype(np.uint8) # don't forget to pad batch idx in coordinates example["coordinates"] = np.pad(example["coordinates"], ((0, 0), (1, 0)), mode='constant', constant_values=0) # don't forget to add newaxis for anchors example["anchors"] = example["anchors"][np.newaxis, ...] with torch.no_grad(): example_torch = example_convert_to_torch(example) pred, preds_dict = self.net(example_torch, ret_preds_dict=True) # get detections detections = {} pred = pred[0] box3d = pred["box3d_lidar"].detach().cpu().numpy() locs = box3d[:, :3] dims = box3d[:, 3:6] rots = np.concatenate([np.zeros([locs.shape[0], 2], dtype=np.float32), -box3d[:, 6:7]], axis=1) detections["dt_locs"] = locs.tolist() detections["dt_dims"] = dims.tolist() detections["dt_rots"] = rots.tolist() detections["dt_labels"] = pred["label_preds"].detach().cpu().numpy().tolist() detections["dt_scores"] = pred["scores"].detach().cpu().numpy().tolist() # get confidence map cls_preds = preds_dict['cls_preds'] # (1, 2, 200, 176, 1) cls_preds = cls_preds[0, :, :, :, 0] # (2, 200, 176) cls_preds = torch.sigmoid(cls_preds).detach().cpu().numpy() # (2, 200, 176) anchors = example["anchors"][0] # (2 * 200 * 176, 7) anchors_mask = example.get("anchors_mask", None) # (200 * 176,) dtype=np.uint8 confidence_map = self.get_confidence_map( anchors, anchors_mask, cls_preds, self.light_curtain.lc_device.TRANSFORMS["wTc"] ) stream_data = dict(detections=detections, confidence_map=confidence_map) yield self.env.timeout(self.latency) # forward pass self.publish(stream_data) # get design points yield self.env.process(self.dp_optimizer.service(confidence_map)) design_pts = self.dp_optimizer.stream[-1].data # (N, 2) # operate light curtain yield self.env.process(self.light_curtain.service(design_pts))
def slicing_forward(self, dataset_iter, deadline): self.measure_time_start('Pre-stage-1', False) self.measure_time_start('PFE') #self.measure_time_start('PillarGen') try: if self._repeat_example: example = merge_second_batch([ self._data_loader.dataset[self._repeat_example_idx] ]) # id 19 else: example = next(dataset_iter) except StopIteration: print("Woaaaah, that is unexpected! Check dataset iter!") return None, None, None num_voxels = example["num_voxels"][0][0] # batch size 1 example = example_convert_to_torch(example, self._float_dtype) #self.measure_time_end('PillarGen') torch.backends.cudnn.benchmark = False io_dict = self._net.forward_pfn(example) torch.backends.cudnn.benchmark = self._cudnn_benchmarking self.measure_time_end('PFE') #with torch.cuda.stream(self._other_cuda_stream): # Calculate anchor mask stg0_sum = torch.sum(io_dict['stage0'], 1, keepdim=True) sum_mask = torch.nn.functional.max_pool2d( stg0_sum, 15, stride=int(self._stg0_pred_scale_rate), padding=7).type(torch.bool) example['anchors_mask'] = sum_mask.expand( self._box_preds_size[:-1]).contiguous() sum_del_mask = torch.unsqueeze(torch.logical_not(sum_mask), -1) sum_del_mask = sum_del_mask.expand(self._cls_preds_size).contiguous() #self.measure_time_start("RPN-total") # Returns possible batch sizes for reach stage # self.measure_time_start('RPN-stage-1') self._net.forward_rpn_stage(io_dict) self._net.forward_rpn_cls_preds(io_dict) self.measure_time_end('RPN-stage-1') # Calculate sum of class scores within each slice # but only use class scores positioned close to pillar locations # use stg0 to create the pillar mask which will be used for slicing # Apply sigmoid and mask values below nms threshold cls_scores = torch.sigmoid(io_dict["cls_preds"]) cls_scores_del = cls_scores <= self._nms_score_threshold #torch.cuda.default_stream().wait_stream(self._other_cuda_stream) cls_scores_del = torch.logical_or(cls_scores_del, sum_del_mask) cls_scores.masked_scatter_(cls_scores_del, self._pred_zeroer) if not self._net._encode_background_as_zeros: cls_scores = cls_scores[..., 1:].contiguous() cls_scores = torch.sum(cls_scores, [0, 1, 2, 4]) # reduce to H csa = self.slice_with_ranges(cls_scores.cpu(), self._cls_scr_ranges) # LOOKS LIKE IT IS SYNCHED AT THIS POINT if not self._merge_preds: anchors = example['anchors'].view(self._box_preds_size) aa = self.slice_preds_with_ranges(anchors, self._preds_slc_ranges) ama = self.slice_with_ranges(example['anchors_mask'], self._preds_slc_ranges) slice_io_dicts = [] for i in range(self._num_slices): slice_io_dicts.append({}) if not self._merge_preds: slice_io_dicts[-1]['anchors'] = aa[i] slice_io_dicts[-1]['anchors_mask'] = ama[i] # Get the cls mask of each slice, also the overlapped regions explicitly # stg1 class scores will be enough for everything cls_scr_sums = torch.empty(2 * len(slice_io_dicts) - 1, dtype=cls_scores.dtype, device='cpu') for i, cs in enumerate(csa): cls_scr_sums[i] = torch.sum(cs) zerocuk_tensor = cls_scr_sums.new_zeros((1, )) slice_io_dicts[0]['cls_scores'] = torch.cat( (zerocuk_tensor, cls_scr_sums[:2])) slice_io_dicts[-1]['cls_scores'] = torch.cat( (cls_scr_sums[-2:], zerocuk_tensor)) for i, io_d in zip(range(1, len(cls_scr_sums) - 2, 2), slice_io_dicts[1:-1]): io_d['cls_scores'] = cls_scr_sums[i:i + 3] # I DON'T NEED TO CALL SYNC BECAUSE IT IS ALREADY SYNCED # FROM WHAT I SAW BUT DO IT ANYWAY, NO BIG LOSS torch.cuda.synchronize() # Now decide the slice forwarding pattern # This algorithm takes 0.5 ms slices_to_exec = self.sched_slices(slice_io_dicts, deadline) stg2_slices, stg3_slices = slices_to_exec stg_seq = [1] self.measure_time_end('Pre-stage-1', False) self.measure_time_start('Post-stage-1', False) data_sliced = False if len(stg2_slices) == self._num_slices: # Since we are going to execute all slices, # Don't do slicing and run the whole stage self.measure_time_start("RPN-stage-2") self._net.forward_rpn_stage(io_dict) self.measure_time_end(f"RPN-stage-2") elif len(stg2_slices) > 0: data_sliced = True # Slice the tensors sa = self.slice_with_ranges(io_dict["stage1"], self._stg1_slc_ranges) ua = self.slice_with_ranges(io_dict["up1"], self._up1_slc_ranges) cpa = self.slice_preds_with_ranges(io_dict["cls_preds"], self._preds_slc_ranges) for i in range(self._num_slices): slice_io_dicts[i]["stages_executed"] = 1 slice_io_dicts[i]["stage1"] = sa[i] slice_io_dicts[i]["up1"] = ua[i] slice_io_dicts[i]["backbone_out"] = ua[i] slice_io_dicts[i]["cls_preds"] = cpa[i] # We have slices to exec through stage 2 # batch the chosen slices batch_io_dict = { "stages_executed": 1, } batch_io_dict["stage1"] = torch.cat( [slice_io_dicts[s]["stage1"] for s in stg2_slices]) #batch_io_dict["up1"] = torch.cat( # [slice_io_dicts[s]["up1"] for s in stg2_slices]) self.measure_time_start("RPN-stage-2") self._net.forward_rpn_stage(batch_io_dict) self.measure_time_end(f"RPN-stage-2") # Scatter the results anyway #if len(stg3_slices) < len(stg2_slices): stg2_chunks = torch.chunk(batch_io_dict["stage2"], len(stg2_slices)) up2_chunks = torch.chunk(batch_io_dict["up2"], len(stg2_slices)) for i, s in enumerate(stg2_slices): slice_io_dicts[s]["stage2"] = stg2_chunks[i] slice_io_dicts[s]["up2"] = up2_chunks[i] slice_io_dicts[s]["stages_executed"] = 2 stg_seq.extend([2] * len(stg2_slices)) if len(stg3_slices) == self._num_slices: # data_sliced will be always false # at this point since stage2 slices # will be also equal to _num_slices self.measure_time_start("RPN-stage-3") self._net.forward_rpn_stage(io_dict) self.measure_time_end(f"RPN-stage-3") elif len(stg3_slices) > 0: # that means stg2_slices was also > 0 data_sliced = True if len(stg2_slices) == self._num_slices: # Slice the tensors if they were not sliced during stage 2 sa = self.slice_with_ranges(io_dict["stage2"], self._stg2_slc_ranges) ua1 = self.slice_with_ranges(io_dict["up1"], self._up1_slc_ranges) ua2 = self.slice_with_ranges(io_dict["up2"], self._up2_slc_ranges) for i in range(self._num_slices): slice_io_dicts[i]["stage2"] = sa[i] slice_io_dicts[i]["up1"] = ua1[i] slice_io_dicts[i]["up2"] = ua2[i] slice_io_dicts[i]["stages_executed"] = 2 batch_io_dict = { "stages_executed": 2, } # We have slices to exec through stage 3 # batch chosen slices batch_io_dict["stage2"] = torch.cat( [slice_io_dicts[s]["stage2"] for s in stg3_slices]) #batch_io_dict["up2"] = torch.cat( # [slice_io_dicts[s]["up2"] for s in stg3_slices]) self.measure_time_start("RPN-stage-3") self._net.forward_rpn_stage(batch_io_dict) self.measure_time_end(f"RPN-stage-3") # Scatter the results up3_chunks = torch.chunk(batch_io_dict["up3"], len(stg3_slices)) for i, s in enumerate(stg3_slices): slice_io_dicts[s]["up3"] = up3_chunks[i] slice_io_dicts[s]["stages_executed"] = 3 stg_seq.extend([3] * len(stg3_slices)) self.measure_time_start("RPN-finalize") if not data_sliced: # No slicing were used if io_dict['stages_executed'] == 1: self._net.forward_rpn_rem_preds(io_dict) else: self._net.forward_rpn_all_preds(io_dict) preds_dict = io_dict else: # We used slicing, now we need to merge the slices # After detection heads # This part can be batched too but it is okay to # stay like this # Another optimization could be using cuda streams for io_d in slice_io_dicts: if io_d["stages_executed"] == 1: # stage 1 slices already has cls preds io_d['backbone_out'] = io_d['backbone_out'].contiguous() self._net.forward_rpn_rem_preds(io_d) else: self._net.forward_rpn_all_preds(io_d) if self._merge_preds: # if two overlapped regions went through same number of # stages, get half of the overlapped region from each # neighboor io dict # Otherwise, select the one with more stages executed preds_dict = {} for k, v in self._pred_dict_copy.items(): preds_dict[k] = v.clone().detach() # every slice has a big middle range and two (or one) # small overlap ranges slc_r = self._cls_scr_ranges[0] for k in preds_dict.keys(): preds_dict[k][..., :slc_r[1], :] = \ slice_io_dicts[0][k][..., :slc_r[1], :] for i in range(len(slice_io_dicts) - 1): io_d1, io_d2 = slice_io_dicts[i], slice_io_dicts[i + 1] se1, se2 = io_d1["stages_executed"], io_d2[ "stages_executed"] ovl_r = self._cls_scr_ranges[i * 2 + 1] ovl_len = ovl_r[1] - ovl_r[0] for k in preds_dict.keys(): if se1 > se2: preds_dict[k][..., ovl_r[0]:ovl_r[1], :] = \ io_d1[k][..., -ovl_len:, :] elif se1 < se2: preds_dict[k][..., ovl_r[0]:ovl_r[1], :] = \ io_d2[k][..., :ovl_len, :] else: mid = ovl_len // 2 preds_dict[k][..., ovl_r[0]:(ovl_r[0]+mid), :] = \ io_d1[k][..., -ovl_len:(-ovl_len+mid), :] preds_dict[k][..., (ovl_r[0]+mid):ovl_r[1], :] = \ io_d2[k][..., mid:ovl_len, :] slc_r = self._cls_scr_ranges[i * 2 + 2] slc_len = slc_r[1] - slc_r[0] preds_dict[k][..., slc_r[0]:slc_r[1], :] = \ io_d2[k][..., ovl_len:(ovl_len+slc_len) , :] for k, v in preds_dict.items(): preds_dict[k] = v.contiguous() self.measure_time_end("RPN-finalize") #self.measure_time_end("RPN-total") # ASSUME BATCH SIZE 1 # Predict has high execution time variance, I wonder why # IDEA: Use anchor mask to predict prediction time # actually, I can just use number of pillars as well self.measure_time_start('Predict') torch.backends.cudnn.benchmark = False if self._merge_preds: # DEBUG #self.plot_amask_and_save(example['anchors_mask'], f"merged_{self._sample_idx}") #self.plot_cls_scores_and_save(preds_dict['cls_preds'], example['anchors_mask'], # f"merged_{self._sample_idx}") # DEBUG END det = self._net.predict(example, preds_dict) else: # I can use batching for prediction # Exclude stage 1 slices having class score sum of 0 selected_slices = [] for i, s in enumerate(slice_io_dicts): if s['stages_executed'] > 1 or torch.sum(s['cls_scores']) > .0: selected_slices.append(s) det = self.create_empty_det_dict(example['metadata'][0]) if len(selected_slices) > 0: batch_pred_dict = {} for k in self._pred_dict_copy.keys(): batch_pred_dict[k] = torch.cat( [s[k] for s in selected_slices]) for k in ['anchors', 'anchors_mask']: example[k] = torch.cat([s[k] for s in selected_slices]) example['metadata'] = [] slice_dets = self._net.predict(example, batch_pred_dict) # remove slices that has no detections slice_dets_final = [] for sd in slice_dets: if sd['box3d_lidar'].shape[0] > 0: slice_dets_final.append(sd) if len(slice_dets_final) > 0: # merge final slice detections for k in det.keys(): if k != 'metadata': det[k] = torch.cat( [d[k] for d in slice_dets_final]) #print('3D bounding boxes before:') #for box in det['box3d_lidar']: # print(box) # Now we need to remove duplicated overlapped predictions # if they exist. We have to do it because we executed NMS # twice on overlapped regions mask_indexes = [] centers = det['box3d_lidar'][:, :2].cpu() scores = det['scores'].cpu() for i in range(centers.shape[0]): diffs = torch.linalg.norm(centers - centers[i], dim=1) sel = True for j, d in enumerate(diffs): if d > 0 and d < 2. and scores[i] < scores[j]: # distance below 2 meter threshold sel = False print(f"Discard 3d bbox at", centers[i], 'in image', det['metadata']['image_idx']) break if sel: mask_indexes.append(i) for k, v in det.items(): if k != 'metadata': det[k] = det[k][mask_indexes] det = [det] # batch size 1 torch.backends.cudnn.benchmark = self._cudnn_benchmarking self.measure_time_end('Predict') torch.cuda.synchronize() self.measure_time_end('Post-stage-1', False) return det, stg_seq, num_voxels
def no_slicing_forward(self, dataset_iter, deadline): self.measure_time_start('Pre-stage-1') self.measure_time_start('PFE') #self.measure_time_start('PillarGen') try: if self._repeat_example: example = merge_second_batch( [self._data_loader.dataset[self._repeat_example_idx]]) else: example = next(dataset_iter) except StopIteration: print("Woaaaah, that is unexpected! Check dataset iter!") return None, None, None num_voxels = example["num_voxels"][0][0] # batch size 1 if self._method == 3: # imprecise #num_stgs = self.num_stages_to_exec(deadline, num_voxels) print('ERROR! imprecise no slice is not being supported') else: num_stgs = self._method + 1 example = example_convert_to_torch(example, self._float_dtype) torch.backends.cudnn.benchmark = False io_dict = self._net.forward_pfn(example) torch.backends.cudnn.benchmark = self._cudnn_benchmarking # Calculate anchor mask stg0_sum = torch.sum(io_dict['stage0'], 1, keepdim=True) sum_mask = torch.nn.functional.max_pool2d( stg0_sum, 15, stride=int(self._stg0_pred_scale_rate), padding=7).type(torch.bool) example['anchors_mask'] = sum_mask.expand( self._box_preds_size[:-1]).contiguous() self.measure_time_end('PFE') #self.measure_time_start('RPN-total') self.measure_time_start('RPN-stage-1') self._net.forward_rpn_stage(io_dict) self.measure_time_end('RPN-stage-1') stg_seq = [1] self.measure_time_end('Pre-stage-1') self.measure_time_start('Post-stage-1') if num_stgs >= 2: self.measure_time_start('RPN-stage-2') self._net.forward_rpn_stage(io_dict) self.measure_time_end('RPN-stage-2') stg_seq.append(2) if num_stgs == 3: self.measure_time_start('RPN-stage-3') self._net.forward_rpn_stage(io_dict) self.measure_time_end('RPN-stage-3') stg_seq.append(3) self.measure_time_start('RPN-finalize') self._net.forward_rpn_all_preds(io_dict) self.measure_time_end('RPN-finalize') #self.measure_time_end('RPN-total') self.measure_time_start('Predict') #torch.cuda.nvtx.range_push('Predict') torch.backends.cudnn.benchmark = False det = self._net.predict(example, io_dict) torch.backends.cudnn.benchmark = self._cudnn_benchmarking #torch.cuda.nvtx.range_pop() self.measure_time_end('Predict') torch.cuda.synchronize() self.measure_time_end('Post-stage-1') return det, stg_seq, num_voxels
def __init__(self, net, data_loader, deadline_sec, slice_size_perc, \ min_slice_overlap_perc, method, float_dtype, batch_size=1, measure_time=True): assert (slice_size_perc < 100 or method < 4) self._kitti = False self._calibration = False self._cur_calib_tuple = None self._repeat_example = False self._repeat_example_idx = 5 self._max_num_samples = len(data_loader) self._merge_preds = True # Run NMS on each slice if False self._net = net self._data_loader = data_loader self._deadline_sec = deadline_sec self._slice_size_perc = slice_size_perc self._min_slice_overlap_perc = min_slice_overlap_perc self._method = method self._float_dtype = float_dtype self._batch_size = batch_size self._measure_time = measure_time self._H_dim = 3 self._W_dim = 2 self._use_slicing = (self._method >= 4) self._nms_score_threshold = 0.5 #Print point cloud range : [h_min, w_min, z_min, h_max, w_max, z_max] self._pc_range = net.voxel_generator.point_cloud_range print('Point cloud range:', self._pc_range) #Print pillar scatter output shape : [1, 1, W, H , 64] self._p_scatter_outp_shape = net.middle_feature_extractor.output_shape print('Point pillar scatter output:', self._p_scatter_outp_shape) self._eval_dict = {} print("VAL method", self._method) self._eval_dict["method"] = self._method self._enable_bar = True self._cudnn_benchmarking = True self._cudnn_deterministic = False self._eval_dict["cudnn_benchmarking"] = self._cudnn_benchmarking self._eval_dict["cudnn_deterministic"] = self._cudnn_deterministic if self._cudnn_deterministic: torch.backends.cudnn.deterministic = True torch.cuda.manual_seed(0) np.random.seed(0) torch.set_deterministic(True) torch.backends.cudnn.benchmark = self._cudnn_benchmarking # Forward once to heat cache and get the slice ranges example = next(iter(self._data_loader)) # how should I call this func? with torch.no_grad(): example = example_convert_to_torch(example, float_dtype) if 'anchors_mask' in example: del example[ 'anchors_mask'] #example['anchors_mask'].type(torch.bool) io_dict = self._net.forward_pfn(example) # has variable input size self._net.forward_rpn_stage(io_dict) self._net.forward_rpn_stage(io_dict) self._net.forward_rpn_stage(io_dict) self._net.forward_rpn_all_preds(io_dict) det = self._net.predict(example, io_dict)[0] self._net.calc_elapsed_times() self._det_dict_copy = { "box3d_lidar": torch.zeros([0, det["box3d_lidar"].size()[1]], dtype=det["box3d_lidar"].dtype, device=det["box3d_lidar"].device), "scores": torch.zeros([0], dtype=det["scores"].dtype, device=det["scores"].device), "label_preds": torch.zeros([0], dtype=det["label_preds"].dtype, device=det["label_preds"].device), "metadata": None, } #Print full tensor sizes for k, v in io_dict.items(): if not isinstance(v, int): print(k, v.size()) self._box_preds_size = io_dict['box_preds'].size() self._cls_preds_size = io_dict['cls_preds'].size() self._net.clear_timers() self._pred_zeroer = torch.zeros_like(io_dict["cls_preds"]) self._pred_dict_copy = { "cls_preds": None, "box_preds": None, "dir_cls_preds": None } for k in self._pred_dict_copy.keys(): self._pred_dict_copy[k] = torch.zeros_like(io_dict[k]) # The H_dim of preds does not change over stages pred_sz = io_dict["cls_preds"].size()[self._H_dim] stg0_sz = io_dict["stage0"].size()[self._H_dim] self._stg0_pred_scale_rate = stg0_sz / pred_sz if self._use_slicing: self._preds_slc_ranges, self._preds_ovl_slc_ranges = \ self.get_slice_ranges_v3(io_dict["cls_preds"]) # Slicing can happen after stage 1 or stage 2 stg1_sz = io_dict["stage1"].size()[self._H_dim] up1_sz = io_dict["up1"].size()[self._H_dim] scale_rate = stg1_sz / pred_sz self._stg1_slc_ranges = [(int(r[0] * scale_rate), int(r[1] * scale_rate)) for r in self._preds_slc_ranges] scale_rate = up1_sz / pred_sz self._up1_slc_ranges = [(int(r[0] * scale_rate), int(r[1] * scale_rate)) for r in self._preds_slc_ranges] stg2_sz = io_dict["stage2"].size()[self._H_dim] up2_sz = io_dict["up2"].size()[self._H_dim] scale_rate = stg2_sz / pred_sz self._stg2_slc_ranges = [(int(r[0] * scale_rate), int(r[1] * scale_rate)) for r in self._preds_slc_ranges] scale_rate = up2_sz / pred_sz self._up2_slc_ranges = [(int(r[0] * scale_rate), int(r[1] * scale_rate)) for r in self._preds_slc_ranges] self._num_slices = len(self._stg1_slc_ranges) # split overlapped regions for class scores posr = self._preds_ovl_slc_ranges self._cls_scr_ranges = [(0, posr[0][0])] for i in range(len(posr) - 1): ro1, ro2 = posr[i], posr[i + 1] self._cls_scr_ranges.append(ro1) self._cls_scr_ranges.append((ro1[1], ro2[0])) self._cls_scr_ranges.append(posr[-1]) self._cls_scr_ranges.append((posr[-1][1], pred_sz)) if self._cudnn_benchmarking: # This will trigger all possible slice inputs print('Starting dry run for benchmarking') self._dry_run_slices = [] self._dry_run_idx = 0 for i in range(self._num_slices + 1): for j in range(i + 1): self._dry_run_slices.append( (np.arange(i, dtype=np.uint).tolist(), np.arange(j, dtype=np.uint).tolist())) di = iter(self._data_loader) self._dry_run = True for drs in self._dry_run_slices: self.slicing_forward(di, time.time() + 10.0) self._dry_run = False self._net.clear_timers() print('Dry run finished') slice_size_percs_possible = None do_slice_size_investigation = False if do_slice_size_investigation: slice_size_percs_possible = [] print("Num_slices\tslice_size_perc\t\toverlap_percs") keepit = self._slice_size_perc for ssp in range(stg0_sz // 10, stg0_sz // 2): self._slice_size_perc = ssp / stg0_sz * 100 slc_ranges, ovl_ranges = self.get_slice_ranges_v3( io_dict["stage0"]) ovl_percs = [ round((ovl_r[1]-ovl_r[0]) / stg0_sz * 100, 2) \ for ovl_r in ovl_ranges ] stg0_slices = self.slice_with_ranges( io_dict["stage0"], slc_ranges) num_slcs = len(stg0_slices) temp_io_dict = { "stage0": stg0_slices[0], "stages_executed": 0, } try: for i in range(3): self._net.forward_rpn_stage(temp_io_dict) self._net.forward_rpn_cls_preds(temp_io_dict) stg0_slc_sz = temp_io_dict["stage0"].size()[ self._H_dim] pred_slc_sz = temp_io_dict["cls_preds"].size()[ self._H_dim] if stg0_slc_sz % pred_slc_sz != 0: raise Exception("Rates does not match") except: pass else: slice_size_percs_possible.append(self._slice_size_perc) print( f"{num_slcs}\t\t{self._slice_size_perc}\t{ovl_percs}" ) self._slice_size_perc = keepit self._calib_test_cases = [] if self._calibration: self._calibration_dict = { "data": {}, "stats": {}, "eval": {}, "mAP": {}, } for i in range(self._num_slices + 1): for j in range(i + 1): self._calib_test_cases.append((i, j)) else: # find the calibration file and read it with open(f"slice_calib_dict_s{self._slice_size_perc}.json", 'r') as handle: self._calibration_dict = json.load(handle) # Use 99 percentile Post-stage-1 times m = np.finfo(np.single).max self._post_stg1_table = np.full( (self._num_slices + 1, self._num_slices + 1), m) stat_dict = self._calibration_dict['stats'] for k, v in stat_dict.items(): r, c = k.replace('(', '').replace(')', '').replace(',', '').split() r, c = int(r), int(c) self._post_stg1_table[r, c] = v['Post-stage-1'][3] print("VAL min_slice_overlap_perc", self._min_slice_overlap_perc) self._eval_dict[ "min_slice_overlap_perc"] = self._min_slice_overlap_perc print("VAL num_slices", self._num_slices) self._eval_dict["num_slices"] = self._num_slices print("2D_LIST _stg1_slc_ranges", self._stg1_slc_ranges) print("2D_LIST _stg2_slc_ranges", self._stg2_slc_ranges) print("2D_LIST _preds_slc_ranges", self._preds_slc_ranges) print('2D_LIST _cls_scr_ranges', self._cls_scr_ranges) else: print("VAL min_slice_overlap_perc", 0) self._eval_dict["min_slice_overlap_perc"] = 0 print("VAL num_slices 1") self._eval_dict["num_slices"] = 1 print("VAL deadline_sec", self._deadline_sec) self._eval_dict["deadline_sec"] = self._deadline_sec print("VAL slice_size_perc", self._slice_size_perc) self._eval_dict["slice_size_perc"] = self._slice_size_perc # I can get ground truth like this # ground truth center location : [w, z, h] if self._kitti: self._gt_annos = [ info["annos"] for info in data_loader.dataset.dataset._kitti_infos ] self._gt_img_paths = [ info["image"]["image_path"] for info in data_loader.dataset.dataset._kitti_infos ] else: self._gt_annos = data_loader.dataset.dataset.ground_truth_annotations self._gt_img_paths = [ info["cam_front_path"] for info in data_loader.dataset.dataset._nusc_infos ] #self._other_cuda_stream = torch.cuda.Stream() """