def test(config_path): # config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) model_cfg = config.model.second # voxel_generator = voxel_builder.build(model_cfg.voxel_generator) vfe_num_filters = list(model_cfg.voxel_feature_extractor.num_filters) grid_size = voxel_generator.grid_size # [1] + [10, 400, 352] + [128] dense_shape = [1] + grid_size[::-1].tolist() + [vfe_num_filters[-1]] # [1, 10, 400, 352, 128] print('dense_shape', dense_shape) middle_num_filters_d1 = list( model_cfg.middle_feature_extractor.num_filters_down1) middle_num_filters_d2 = list( model_cfg.middle_feature_extractor.num_filters_down2) middle_feature_extractor = SparseMiddleExtractor( output_shape=dense_shape, use_norm=True, num_input_features=vfe_num_filters[-1], num_filters_down1=middle_num_filters_d1, num_filters_down2=middle_num_filters_d2) middle_feature_extractor = middle_feature_extractor.cuda() print(count_parameters(middle_feature_extractor)) # 0.4M coors = [[0, 11, 12, 13], [1, 22, 23, 24], [0, 33, 34, 35]] coors = torch.Tensor(coors) voxel_features = torch.randn(3, vfe_num_filters[-1]).cuda() batch_size = 2 ret = middle_feature_extractor(voxel_features, coors, batch_size) print(ret.shape) # [2, 128, 400, 352]
def _init_model(self): self.config = pipeline_pb2.TrainEvalPipelineConfig() with open(self.config_p, 'r') as f: proto_str = f.read() text_format.Merge(proto_str, self.config) self.input_cfg = self.config.eval_input_reader self.model_cfg = self.config.model.second config_tool.change_detection_range_v2(self.model_cfg, [-50, -50, 50, 50]) logging.info('config loaded.') self.net = build_network(self.model_cfg).to(device).eval() self.net.load_state_dict(torch.load(self.model_p)) self.target_assigner = self.net.target_assigner self.voxel_generator = self.net.voxel_generator logging.info('network done, voxel done.') grid_size = self.voxel_generator.grid_size feature_map_size = grid_size[:2] // config_tool.get_downsample_factor( self.model_cfg) feature_map_size = [*feature_map_size, 1][::-1] self.anchors = self.target_assigner.generate_anchors( feature_map_size)['anchors'] self.anchors = torch.tensor(self.anchors, dtype=torch.float32, device=device) self.anchors = self.anchors.view(1, -1, 7) logging.info('anchors generated.')
def read_config(path): config = pipeline_pb2.TrainEvalPipelineConfig() with open(path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) return config
def trans_onnx(config_path, ckpt_path): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) model_cfg = config.model.second net = build_network(model_cfg, measure_time=False).to(device) net.load_state_dict(torch.load(ckpt_path)) voxels = torch.ones([12000, 100, 4], dtype=torch.float32, device=device) num_points = torch.ones([12000], dtype=torch.float32, device=device) coors = torch.ones([12000, 4], dtype=torch.float32, device=device) example1 = (voxels, num_points, coors) spatial_features = torch.ones([1, 64, 496, 432], dtype=torch.float32, device=device) example2 = (spatial_features, ) torch.onnx.export(net.voxel_feature_extractor, example1, "pfe.onnx", verbose=False) torch.onnx.export(net.rpn, example2, "rpn.onnx", verbose=False)
def test(config_path): # cfg config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) model_cfg = config.model.second input_cfg = config.train_input_reader # builds voxel_generator = voxel_builder.build(model_cfg.voxel_generator) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] box_coder = box_coder_builder.build(model_cfg.box_coder) target_assigner_cfg = model_cfg.target_assigner target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range, box_coder) # start = time.time() input_cfg.database_sampler.database_sampler_name = "DataBaseSamplerV3" training = True dataset = dataset_builder_build(input_cfg, model_cfg, training, voxel_generator, target_assigner) dataset = DatasetWrapper(dataset) print(len(dataset)) example1 = dataset[2] #example2 = dataset[22] #example3 = dataset[122] print(time.time() - start, 'sec')
def build_network_(): global BACKEND instance = request.json cfg_path = Path(instance["config_path"]) ckpt_path = Path(instance["checkpoint_path"]) response = {"status": "normal"} if BACKEND.root_path is None: return error_response("root path is not set") if not cfg_path.exists(): return error_response("config file not exist.") if not ckpt_path.exists(): return error_response("ckpt file not exist.") config = pipeline_pb2.TrainEvalPipelineConfig() with open(cfg_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) device = device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") net = build_network(config.model.second).to(device).float().eval() net.load_state_dict(torch.load(ckpt_path)) eval_input_cfg = config.eval_input_reader BACKEND.dataset = input_reader_builder.build( eval_input_cfg, config.model.second, training=False, voxel_generator=net.voxel_generator, target_assigner=net.target_assigner).dataset BACKEND.net = net BACKEND.config = config BACKEND.device = device response = jsonify(results=[response]) response.headers['Access-Control-Allow-Headers'] = '*' print("build_network successful!") return response
def pytorch_inference(config_path=None, ckpt_path=None, data_path=None): config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) model_cfg = config.model.second device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = build_network(model_cfg).to(device).eval() net.load_state_dict(torch.load(ckpt_path)) points = read_pointcloud(data_path) example = generate_example(net, model_cfg, points, device) pred = net(example)[0] boxes_lidar = pred["box3d_lidar"].detach().cpu().numpy() vis_voxel_size = [0.1, 0.1, 0.1] vis_point_range = [-50, -30, -3, 50, 30, 1] bev_map = simplevis.point_to_vis_bev(points, vis_voxel_size, vis_point_range) bev_map = simplevis.draw_box_in_bev(bev_map, vis_point_range, boxes_lidar, [0, 255, 0], 2) plt.imsave("result.png", bev_map)
def _init_net(self): self.config = pipeline_pb2.TrainEvalPipelineConfig() with open(self.config_f, "r") as f: proto_str = f.read() text_format.Merge(proto_str, self.config) self.input_cfg = self.config.eval_input_reader self.model_cfg = self.config.model.second self.train_cfg = self.config.train_config self.class_names = list(self.input_cfg.class_names) self.center_limit_range = self.model_cfg.post_center_limit_range # BUILD VOXEL GENERATOR voxel_generator = voxel_builder.build(self.model_cfg.voxel_generator) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] box_coder = box_coder_builder.build(self.model_cfg.box_coder) target_assigner_cfg = self.model_cfg.target_assigner self.target_assigner = target_assigner_builder.build( target_assigner_cfg, bv_range, box_coder) self.net = second_builder.build(self.model_cfg, voxel_generator, self.target_assigner) self.net.cuda() if self.train_cfg.enable_mixed_precision: self.net.half() self.net.metrics_to_float() self.net.convert_norm_to_float(self.net) torchplus.train.try_restore_latest_checkpoints(self.model_dir, [self.net]) print('Success load latest checkpoint in {}'.format(self.model_dir))
def save_config(config_path, save_path): config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) ret = text_format.MessageToString(config, indent=2) with open(save_path, 'w') as f: f.write(ret)
def onnx_inference(config_path, data_path, pfe_path, rpn_path): config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) model_cfg = config.model.second device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = build_network(model_cfg).to(device).eval() points = read_pointcloud(data_path) example = generate_example(net, model_cfg, points, device) #onnx inference ort_session_pfe = onnxruntime.InferenceSession(pfe_path) ort_session_rpn = onnxruntime.InferenceSession(rpn_path) # compute ONNX Runtime output prediction ort_inputs_pfe = { ort_session_pfe.get_inputs()[0].name: to_numpy(example["voxels"]), ort_session_pfe.get_inputs()[1].name: to_numpy(example["num_points"]), ort_session_pfe.get_inputs()[2].name: to_numpy(example["coordinates"]) } ort_outs_pfe = ort_session_pfe.run(None, ort_inputs_pfe) voxel_features = torch.from_numpy(ort_outs_pfe[0]).to(device) spatial_features = net.middle_feature_extractor(voxel_features, example["coordinates"], 1) ort_inputs_rpn = { ort_session_rpn.get_inputs()[0].name: to_numpy(spatial_features) } ort_outs_rpn = ort_session_rpn.run(None, ort_inputs_rpn) preds_dict = {} preds_dict["box_preds"] = torch.from_numpy(ort_outs_rpn[0]).to(device) preds_dict["cls_preds"] = torch.from_numpy(ort_outs_rpn[1]).to(device) preds_dict["dir_cls_preds"] = torch.from_numpy(ort_outs_rpn[2]).to(device) with torch.no_grad(): pred = net.predict(example, preds_dict)[0] boxes_lidar = pred["box3d_lidar"].detach().cpu().numpy() vis_voxel_size = [0.1, 0.1, 0.1] vis_point_range = [-50, -30, -3, 50, 30, 1] bev_map = simplevis.point_to_vis_bev(points, vis_voxel_size, vis_point_range) bev_map = simplevis.draw_box_in_bev(bev_map, vis_point_range, boxes_lidar, [0, 255, 0], 2) plt.imsave("result_onnx.png", bev_map)
def read_config_file(config_path): config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) input_cfg = config.eval_input_reader model_cfg = config.model.second # added a new method to fix error config_tool.change_detection_range_v2(model_cfg, [-50, -50, 50, 50]) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") return input_cfg, model_cfg, device
def log_function(model_dir, config_path): model_logging = SimpleModelLog(model_dir) model_logging.open() config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) model_logging.log_text(proto_str + "\n", 0, tag="config") return model_logging
def edit_detector_config(dataset_root, temp_data_dir, config_path): config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) config.eval_input_reader.kitti_info_path = temp_data_dir + "/kitti_infos_test.pkl" config.eval_input_reader.kitti_root_path = dataset_root config_text = text_format.MessageToString(config) with open(config_path, "w") as f: f.write(config_text)
def read_config(self): config_path = self.config_path config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) self.input_cfg = config.eval_input_reader self.model_cfg = config.model.second config_tool.change_detection_range(self.model_cfg, [-30, -20, 30, 20]) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu")
def inference(model_dir=None,filter_bg=False): model_dir = Path(model_dir) config_path='./configs/xyres_28_huituo.config' device = torch.device("cuda") config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) model_cfg = config.model.second voxel_cfg=model_cfg.voxel_generator max_voxel_num=12000 if filter_bg: max_voxel_num=1000 net = build_network(model_cfg).to(device) #读取权重文件 state_dict=torch.load(str(model_dir/'voxelnet.tckpt')) for key in ["global_step", "rpn_acc.total", "rpn_acc.count", "rpn_precision.total", "rpn_precision.count", "rpn_recall.total", "rpn_recall.count", "rpn_metrics.prec_total", "rpn_metrics.prec_count", "rpn_metrics.rec_total", "rpn_metrics.rec_count", "rpn_cls_loss.total", "rpn_cls_loss.count", "rpn_loc_loss.total", "rpn_loc_loss.count", "rpn_total_loss.total", "rpn_total_loss.count"]: if key in state_dict.keys(): state_dict.pop(key) net.load_state_dict(state_dict) net.eval() #tensorrt引擎路径 pfe_trt_path=str(model_dir/"pfe.trt") rpn_trt_path = str(model_dir / "rpn.trt") #生成模型虚假输入数据用于编译tensorrt引擎 example_tensor=generate_tensor_list(max_voxel_num,float_type=torch.float32,device=device) #编译pillar feature net子网络引擎 print("开始转换pfe子网络......") pfe_trt = torch2trt(net.voxel_feature_extractor, example_tensor, fp16_mode=True, max_workspace_size=1 << 20) torch.save(pfe_trt.state_dict(), pfe_trt_path) # 编译rpn子网络引擎 print("开始转换rpn子网络......") pc_range=np.array(voxel_cfg.point_cloud_range) vs=np.array(voxel_cfg.voxel_size) fp_size=((pc_range[3:]-pc_range[:3])/vs)[::-1].astype(np.int) rpn_input = torch.ones((1, 64, fp_size[1], fp_size[2]), dtype=torch.float32, device=device) rpn_trt = torch2trt(net.rpn, [rpn_input], fp16_mode=True, max_workspace_size=1 << 20) torch.save(rpn_trt.state_dict(), rpn_trt_path) print("export trt model successful")
def train_multi_rpn_layer_num(): config_path = "./configs/nuscenes/all.fhd.config" model_root = Path.home() / "second_test" # don't forget to change this. config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) input_cfg = config.eval_input_reader model_cfg = config.model.second layer_nums = [2, 4, 7, 9] for l in layer_nums: model_dir = str(model_root / f"all_fhd_{l}") model_cfg.rpn.layer_nums[:] = [l] train(config, model_dir, resume=True)
def load_model_config(model_dir, config_path): model_dir = pathlib.Path(model_dir) model_dir.mkdir(parents=True, exist_ok=True) config_file_bkp = "pipeline.config" config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) shutil.copyfile(config_path, str(model_dir / config_file_bkp)) return config.model.second
def build_inference_net(config_path, model_dir, result_path=None, predict_test=False, ckpt_path=None, ref_detfile=None, pickle_result=True, measure_time=False, batch_size=1): model_dir = pathlib.Path(model_dir) if predict_test: result_name = 'predict_test' else: result_name = 'eval_results' if result_path is None: result_path = model_dir / result_name else: result_path = pathlib.Path(result_path) config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) model_cfg = config.model.second detection_2d_path = config.train_config.detection_2d_path center_limit_range = model_cfg.post_center_limit_range voxel_generator = voxel_builder.build(model_cfg.voxel_generator) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] box_coder = box_coder_builder.build(model_cfg.box_coder) target_assigner_cfg = model_cfg.target_assigner target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range, box_coder) class_names = target_assigner.classes net = second_builder.build(model_cfg, voxel_generator, target_assigner, measure_time=measure_time) net.cuda() if ckpt_path is None: print("load existing model") torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) else: torchplus.train.restore(ckpt_path, net) batch_size = batch_size or input_cfg.batch_size #batch_size = 1 net.eval() return net
def main(): cfg_path = Path('/..../pointpillars/car/xyres_##.config') ckpt_path = Path('/..../voxelnet-######.tckpt') config = pipeline_pb2.TrainEvalPipelineConfig() print("config reading") with open(cfg_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print("building net") net = build_network(config.model.second).to(device).float().eval() net.load_state_dict(torch.load(ckpt_path)) print("net built") eval_input_cfg = config.eval_input_reader dataset = input_reader_builder.build( eval_input_cfg, config.model.second, training=False, voxel_generator=net.voxel_generator, target_assigner=net.target_assigner).dataset idx = 0 example = dataset[idx] example["coordinates"] = np.pad(example["coordinates"], ((0, 0), (1, 0)), mode='constant', constant_values=0) # don't forget to add newaxis for anchors example["anchors"] = example["anchors"][np.newaxis, ...] example_torch = example_convert_to_torch(example, device=device) voxels = example_torch["voxels"] num_points = example_torch["num_points"] coors = example_torch["coordinates"] batch_anchors = example["anchors"] batch_size_dev = batch_anchors.shape[0] voxel_features = net.voxel_feature_extractor(voxels, num_points, coors) spatial_features = net.middle_feature_extractor(voxel_features, coors, batch_size_dev) # Export the model print("exporting as onnx") torch_out = torch.onnx._export(net.rpn, (spatial_features), "rpn.onnx", export_params=True) print("export complete")
def load_config(model_dir, config_path): config_file_bkp = "pipeline.config" if isinstance(config_path, str): # directly provide a config object. this usually used # when you want to train with several different parameters in # one script. config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) else: config = config_path proto_str = text_format.MessageToString(config, indent=2) with (model_dir / config_file_bkp).open("w") as f: f.write(proto_str) return config, proto_str
def __init__(self, config_filepath, weight_filepath): # ====================================================== # Read Config file # ====================================================== self.config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_filepath, "r") as f: proto_str = f.read() text_format.Merge(proto_str, self.config) self.input_cfg = self.config.eval_input_reader self.model_cfg = self.config.model.second # config_tool.change_detection_range_v2(self.model_cfg, [-50, -50, 50, 50]) # ====================================================== # Build Network, Target Assigner and Voxel Generator # ====================================================== self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.net = build_network(self.model_cfg).to(self.device).eval() self.net.load_state_dict(torch.load(weight_filepath)) self.target_assigner = self.net.target_assigner self.voxel_generator = self.net.voxel_generator # ====================================================== # Generate Anchors # ====================================================== grid_size = self.voxel_generator.grid_size print("========= grid_size") print(grid_size) print("========= voxel_size") print(self.voxel_generator.voxel_size) print("========= point_cloud_range") print(self.voxel_generator.point_cloud_range) feature_map_size = grid_size[:2] // config_tool.get_downsample_factor( self.model_cfg) feature_map_size = [*feature_map_size, 1][::-1] print("========= feature_map_size") print(feature_map_size) self.anchors = self.target_assigner.generate_anchors( feature_map_size)["anchors"] self.anchors = torch.tensor(self.anchors, dtype=torch.float32, device=self.device) self.anchors = self.anchors.view(1, -1, 7) print("========= anchors.shape") print(self.anchors.shape)
def test(config_path): # config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) model_cfg = config.model.second # voxel_generator = voxel_builder.build(model_cfg.voxel_generator) # bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] box_coder = box_coder_builder.build(model_cfg.box_coder) target_assigner_cfg = model_cfg.target_assigner target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range, box_coder) # num_rpn_input_filters = 64 rpn = RPN(use_norm=True, num_class=model_cfg.num_class, layer_nums=list(model_cfg.rpn.layer_nums), layer_strides=list(model_cfg.rpn.layer_strides), num_filters=list(model_cfg.rpn.num_filters), upsample_strides=list(model_cfg.rpn.upsample_strides), num_upsample_filters=list(model_cfg.rpn.num_upsample_filters), num_input_filters=num_rpn_input_filters * 2, num_anchor_per_loc=target_assigner.num_anchors_per_location, encode_background_as_zeros=model_cfg.encode_background_as_zeros, use_direction_classifier=model_cfg.use_direction_classifier, use_bev=model_cfg.use_bev, num_groups=model_cfg.rpn.num_groups, use_groupnorm=model_cfg.rpn.use_groupnorm, box_code_size=target_assigner.box_coder.code_size) print(count_parameters(rpn)) # 5M spatial_features = torch.randn(1, num_rpn_input_filters * 2, 400, 768) spatial_features = spatial_features.cuda() rpn = rpn.cuda() # spatial_features [Batch, C, H, W] preds_dict = rpn(spatial_features) # box_preds [Batch, H/2, W/2, 14] box_preds = preds_dict["box_preds"] print(box_preds.shape) # cls_preds [Batch, H/2, W/2, 2] cls_preds = preds_dict["cls_preds"] print(cls_preds.shape)
def eval_multi_threshold(): config_path = "./configs/car.fhd.config" ckpt_name = "/path/to/your/model_ckpt" # don't forget to change this. assert "/path/to/your" not in ckpt_name config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) model_cfg = config.model.second threshs = [0.3] for thresh in threshs: model_cfg.nms_score_threshold = thresh # don't forget to change this. result_path = Path.home() / f"second_test_eval_{thresh:.2f}" evaluate(config, result_path=result_path, ckpt_path=str(ckpt_name), batch_size=1, measure_time=True)
def __init__(self, env, lidar, light_curtain, dp_optimizer, config_file, ckpt_file, latency=72): super(Detector, self).__init__(env, capacity=1) self.latency = latency # latency of the forward pass # devices the detector depends on self.lidar = lidar self.light_curtain = light_curtain self.dp_optimizer = dp_optimizer # load config self.config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_file, "r") as f: proto_str = f.read() text_format.Merge(proto_str, self.config) # create net device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.net = self.build_network().to(device).float().eval() self.net.load_state_dict(torch.load(ckpt_file)) # create preprocess function self.preprocess_fn = self.create_preprocess_fn()
def eval_multi_threshold(): config_path = "./configs/nuscenes/all.fhd.config" ckpt_name = "/home/keceli/second_test/all_fhd_2/" # don't forget to change this. #assert "/path/to/your" not in ckpt_name config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) model_cfg = config.model.second #model_cfg['nms_score_threshold'] = 0.3 ### extra added by ags #import pdb; pdb.set_trace() threshs = [0.3] for thresh in threshs: model_cfg.nms_score_threshold = thresh # don't forget to change this. result_path = Path.home() / f"second_test_eval_{thresh:.2f}" evaluate(config, result_path=result_path, ckpt_path=str(ckpt_name), batch_size=1, measure_time=True)
def load_config(model_dir, config_path): model_dir = str(Path(model_dir).resolve()) model_dir = Path(model_dir) config_file_bkp = "pipeline.config" if isinstance(config_path, str): # directly provide a config object. this usually used # when you want to train with several different parameters in # one script. config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) else: config = config_path proto_str = text_format.MessageToString(config, indent=2) with (model_dir / config_file_bkp).open("w") as f: f.write(proto_str) input_cfg = config.train_input_reader eval_input_cfg = config.eval_input_reader model_cfg = config.model.second train_cfg = config.train_config return (input_cfg, eval_input_cfg, model_cfg, train_cfg)
def set_model(config_path, model_dir, ckpt_path=None, ref_detfile=None): model_dir = pathlib.Path(model_dir) result_name = 'predict_test' config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) input_cfg = config.eval_input_reader model_cfg = config.model.second train_cfg = config.train_config class_names = list(input_cfg.class_names) center_limit_range = model_cfg.post_center_limit_range ###################### # BUILD VOXEL GENERATOR ###################### voxel_generator = voxel_builder.build(model_cfg.voxel_generator) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] box_coder = box_coder_builder.build(model_cfg.box_coder) target_assigner_cfg = model_cfg.target_assigner target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range, box_coder) net = second_builder.build(model_cfg, voxel_generator, target_assigner) net.cuda() if train_cfg.enable_mixed_precision: net.half() net.metrics_to_float() net.convert_norm_to_float(net) if ckpt_path is None: torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) else: torchplus.train.restore(ckpt_path, net) return net, input_cfg, model_cfg, train_cfg, class_names, voxel_generator, target_assigner
a_cfg.offsets[:] = old_offset else: raise ValueError("unknown") old_post_range = list(model_config.post_center_limit_range) old_post_range[:2] = new_range[:2] old_post_range[3:5] = new_range[2:] model_config.post_center_limit_range[:] = old_post_range def get_downsample_factor(model_config): downsample_factor = np.prod(model_config.rpn.layer_strides) if len(model_config.rpn.upsample_strides) > 0: downsample_factor /= model_config.rpn.upsample_strides[-1] downsample_factor *= model_config.middle_feature_extractor.downsample_factor downsample_factor = np.round(downsample_factor).astype(np.int64) assert downsample_factor > 0 return downsample_factor if __name__ == "__main__": config_path = "/home/yy/deeplearning/deeplearning/mypackages/second/configs/car.lite.1.config" config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) change_detection_range(config, [-50, -50, 50, 50]) proto_str = text_format.MessageToString(config, indent=2) print(proto_str)
def train(config_path, model_dir, result_path=None, create_folder=False, display_step=50, summary_step=5, pickle_result=True): """train a VoxelNet model specified by a config file. """ if create_folder: if pathlib.Path(model_dir).exists(): model_dir = torchplus.train.create_folder(model_dir) model_dir = pathlib.Path(model_dir) model_dir.mkdir(parents=True, exist_ok=True) eval_checkpoint_dir = model_dir / 'eval_checkpoints' eval_checkpoint_dir.mkdir(parents=True, exist_ok=True) if result_path is None: result_path = model_dir / 'results' config_file_bkp = "pipeline.config" config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) shutil.copyfile(config_path, str(model_dir / config_file_bkp)) input_cfg = config.train_input_reader eval_input_cfg = config.eval_input_reader model_cfg = config.model.second train_cfg = config.train_config class_names = list(input_cfg.class_names) ###################### # BUILD VOXEL GENERATOR ###################### voxel_generator = voxel_builder.build(model_cfg.voxel_generator) ###################### # BUILD TARGET ASSIGNER ###################### bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] box_coder = box_coder_builder.build(model_cfg.box_coder) target_assigner_cfg = model_cfg.target_assigner target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range, box_coder) ###################### # BUILD NET ###################### center_limit_range = model_cfg.post_center_limit_range net = second_builder.build(model_cfg, voxel_generator, target_assigner) net.cuda() # net_train = torch.nn.DataParallel(net).cuda() print("num_trainable parameters:", len(list(net.parameters()))) # for n, p in net.named_parameters(): # print(n, p.shape) ###################### # BUILD OPTIMIZER ###################### # we need global_step to create lr_scheduler, so restore net first. torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) gstep = net.get_global_step() - 1 optimizer_cfg = train_cfg.optimizer if train_cfg.enable_mixed_precision: net.half() net.metrics_to_float() net.convert_norm_to_float(net) optimizer = optimizer_builder.build(optimizer_cfg, net.parameters()) if train_cfg.enable_mixed_precision: loss_scale = train_cfg.loss_scale_factor mixed_optimizer = torchplus.train.MixedPrecisionWrapper( optimizer, loss_scale) else: mixed_optimizer = optimizer # must restore optimizer AFTER using MixedPrecisionWrapper torchplus.train.try_restore_latest_checkpoints(model_dir, [mixed_optimizer]) lr_scheduler = lr_scheduler_builder.build(optimizer_cfg, optimizer, gstep) if train_cfg.enable_mixed_precision: float_dtype = torch.float16 else: float_dtype = torch.float32 ###################### # PREPARE INPUT ###################### dataset = input_reader_builder.build(input_cfg, model_cfg, training=True, voxel_generator=voxel_generator, target_assigner=target_assigner) eval_dataset = input_reader_builder.build(eval_input_cfg, model_cfg, training=False, voxel_generator=voxel_generator, target_assigner=target_assigner) def _worker_init_fn(worker_id): time_seed = np.array(time.time(), dtype=np.int32) np.random.seed(time_seed + worker_id) print(f"WORKER {worker_id} seed:", np.random.get_state()[1][0]) dataloader = torch.utils.data.DataLoader(dataset, batch_size=input_cfg.batch_size, shuffle=True, num_workers=input_cfg.num_workers, pin_memory=False, collate_fn=merge_second_batch, worker_init_fn=_worker_init_fn) eval_dataloader = torch.utils.data.DataLoader( eval_dataset, batch_size=eval_input_cfg.batch_size, shuffle=False, num_workers=eval_input_cfg.num_workers, pin_memory=False, collate_fn=merge_second_batch) data_iter = iter(dataloader) ###################### # TRAINING ###################### log_path = model_dir / 'log.txt' logf = open(log_path, 'a') logf.write(proto_str) logf.write("\n") summary_dir = model_dir / 'summary' summary_dir.mkdir(parents=True, exist_ok=True) writer = SummaryWriter(str(summary_dir)) total_step_elapsed = 0 remain_steps = train_cfg.steps - net.get_global_step() t = time.time() ckpt_start_time = t total_loop = train_cfg.steps // train_cfg.steps_per_eval + 1 # total_loop = remain_steps // train_cfg.steps_per_eval + 1 clear_metrics_every_epoch = train_cfg.clear_metrics_every_epoch if train_cfg.steps % train_cfg.steps_per_eval == 0: total_loop -= 1 mixed_optimizer.zero_grad() try: for _ in range(total_loop): if total_step_elapsed + train_cfg.steps_per_eval > train_cfg.steps: steps = train_cfg.steps % train_cfg.steps_per_eval else: steps = train_cfg.steps_per_eval for step in range(steps): lr_scheduler.step() try: example = next(data_iter) except StopIteration: print("end epoch") if clear_metrics_every_epoch: net.clear_metrics() data_iter = iter(dataloader) example = next(data_iter) example_torch = example_convert_to_torch(example, float_dtype) batch_size = example["anchors"].shape[0] ret_dict = net(example_torch) # box_preds = ret_dict["box_preds"] cls_preds = ret_dict["cls_preds"] loss = ret_dict["loss"].mean() cls_loss_reduced = ret_dict["cls_loss_reduced"].mean() loc_loss_reduced = ret_dict["loc_loss_reduced"].mean() cls_pos_loss = ret_dict["cls_pos_loss"] cls_neg_loss = ret_dict["cls_neg_loss"] loc_loss = ret_dict["loc_loss"] cls_loss = ret_dict["cls_loss"] dir_loss_reduced = ret_dict["dir_loss_reduced"] cared = ret_dict["cared"] labels = example_torch["labels"] if train_cfg.enable_mixed_precision: loss *= loss_scale loss.backward() torch.nn.utils.clip_grad_norm_(net.parameters(), 10.0) mixed_optimizer.step() mixed_optimizer.zero_grad() net.update_global_step() net_metrics = net.update_metrics(cls_loss_reduced, loc_loss_reduced, cls_preds, labels, cared) step_time = (time.time() - t) t = time.time() metrics = {} num_pos = int((labels > 0)[0].float().sum().cpu().numpy()) num_neg = int((labels == 0)[0].float().sum().cpu().numpy()) if 'anchors_mask' not in example_torch: num_anchors = example_torch['anchors'].shape[1] else: num_anchors = int(example_torch['anchors_mask'][0].sum()) global_step = net.get_global_step() if global_step % display_step == 0: loc_loss_elem = [ float(loc_loss[:, :, i].sum().detach().cpu().numpy() / batch_size) for i in range(loc_loss.shape[-1]) ] metrics["step"] = global_step metrics["steptime"] = step_time metrics.update(net_metrics) metrics["loss"] = {} metrics["loss"]["loc_elem"] = loc_loss_elem metrics["loss"]["cls_pos_rt"] = float( cls_pos_loss.detach().cpu().numpy()) metrics["loss"]["cls_neg_rt"] = float( cls_neg_loss.detach().cpu().numpy()) # if unlabeled_training: # metrics["loss"]["diff_rt"] = float( # diff_loc_loss_reduced.detach().cpu().numpy()) if model_cfg.use_direction_classifier: metrics["loss"]["dir_rt"] = float( dir_loss_reduced.detach().cpu().numpy()) metrics["num_vox"] = int(example_torch["voxels"].shape[0]) metrics["num_pos"] = int(num_pos) metrics["num_neg"] = int(num_neg) metrics["num_anchors"] = int(num_anchors) metrics["lr"] = float( mixed_optimizer.param_groups[0]['lr']) metrics["image_idx"] = example['image_idx'][0] flatted_metrics = flat_nested_json_dict(metrics) flatted_summarys = flat_nested_json_dict(metrics, "/") for k, v in flatted_summarys.items(): if isinstance(v, (list, tuple)): v = {str(i): e for i, e in enumerate(v)} writer.add_scalars(k, v, global_step) else: writer.add_scalar(k, v, global_step) metrics_str_list = [] for k, v in flatted_metrics.items(): if isinstance(v, float): metrics_str_list.append(f"{k}={v:.3}") elif isinstance(v, (list, tuple)): if v and isinstance(v[0], float): v_str = ', '.join([f"{e:.3}" for e in v]) metrics_str_list.append(f"{k}=[{v_str}]") else: metrics_str_list.append(f"{k}={v}") else: metrics_str_list.append(f"{k}={v}") log_str = ', '.join(metrics_str_list) print(log_str, file=logf) print(log_str) ckpt_elasped_time = time.time() - ckpt_start_time if ckpt_elasped_time > train_cfg.save_checkpoints_secs: torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step()) ckpt_start_time = time.time() total_step_elapsed += steps torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step()) # Ensure that all evaluation points are saved forever torchplus.train.save_models(eval_checkpoint_dir, [net, optimizer], net.get_global_step(), max_to_keep=100) net.eval() result_path_step = result_path / f"step_{net.get_global_step()}" result_path_step.mkdir(parents=True, exist_ok=True) print("#################################") print("#################################", file=logf) print("# EVAL") print("# EVAL", file=logf) print("#################################") print("#################################", file=logf) print("Generate output labels...") print("Generate output labels...", file=logf) t = time.time() dt_annos = [] prog_bar = ProgressBar() prog_bar.start(len(eval_dataset) // eval_input_cfg.batch_size + 1) for example in iter(eval_dataloader): example = example_convert_to_torch(example, float_dtype) if pickle_result: dt_annos += predict_kitti_to_anno(net, example, class_names, center_limit_range, model_cfg.lidar_input) else: _predict_kitti_to_file(net, example, result_path_step, class_names, center_limit_range, model_cfg.lidar_input) prog_bar.print_bar() sec_per_ex = len(eval_dataset) / (time.time() - t) print(f"avg forward time per example: {net.avg_forward_time:.3f}") print( f"avg postprocess time per example: {net.avg_postprocess_time:.3f}" ) net.clear_time_metrics() print(f'generate label finished({sec_per_ex:.2f}/s). start eval:') print(f'generate label finished({sec_per_ex:.2f}/s). start eval:', file=logf) gt_annos = [ info["annos"] for info in eval_dataset.dataset.kitti_infos ] if not pickle_result: dt_annos = kitti.get_label_annos(result_path_step) result, mAPbbox, mAPbev, mAP3d, mAPaos = get_official_eval_result( gt_annos, dt_annos, class_names, return_data=True) print(result, file=logf) print(result) writer.add_text('eval_result', result, global_step) for i, class_name in enumerate(class_names): writer.add_scalar('bev_ap:{}'.format(class_name), mAPbev[i, 1, 0], global_step) writer.add_scalar('3d_ap:{}'.format(class_name), mAP3d[i, 1, 0], global_step) writer.add_scalar('aos_ap:{}'.format(class_name), mAPaos[i, 1, 0], global_step) writer.add_scalar('bev_map', np.mean(mAPbev[:, 1, 0]), global_step) writer.add_scalar('3d_map', np.mean(mAP3d[:, 1, 0]), global_step) writer.add_scalar('aos_map', np.mean(mAPaos[:, 1, 0]), global_step) result = get_coco_eval_result(gt_annos, dt_annos, class_names) print(result, file=logf) print(result) if pickle_result: with open(result_path_step / "result.pkl", 'wb') as f: pickle.dump(dt_annos, f) writer.add_text('eval_result', result, global_step) net.train() except Exception as e: torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step()) logf.close() raise e # save model before exit torchplus.train.save_models(model_dir, [net, optimizer], net.get_global_step()) logf.close()
def evaluate(config_path, model_dir, result_path=None, predict_test=False, ckpt_path=None, ref_detfile=None, pickle_result=True): model_dir = pathlib.Path(model_dir) if predict_test: result_name = 'predict_test' else: result_name = 'eval_results' if result_path is None: result_path = model_dir / result_name else: result_path = pathlib.Path(result_path) config = pipeline_pb2.TrainEvalPipelineConfig() with open(config_path, "r") as f: proto_str = f.read() text_format.Merge(proto_str, config) input_cfg = config.eval_input_reader model_cfg = config.model.second train_cfg = config.train_config class_names = list(input_cfg.class_names) center_limit_range = model_cfg.post_center_limit_range ###################### # BUILD VOXEL GENERATOR ###################### voxel_generator = voxel_builder.build(model_cfg.voxel_generator) bv_range = voxel_generator.point_cloud_range[[0, 1, 3, 4]] box_coder = box_coder_builder.build(model_cfg.box_coder) target_assigner_cfg = model_cfg.target_assigner target_assigner = target_assigner_builder.build(target_assigner_cfg, bv_range, box_coder) net = second_builder.build(model_cfg, voxel_generator, target_assigner) net.cuda() if train_cfg.enable_mixed_precision: net.half() net.metrics_to_float() net.convert_norm_to_float(net) if ckpt_path is None: torchplus.train.try_restore_latest_checkpoints(model_dir, [net]) else: torchplus.train.restore(ckpt_path, net) eval_dataset = input_reader_builder.build(input_cfg, model_cfg, training=False, voxel_generator=voxel_generator, target_assigner=target_assigner) eval_dataloader = torch.utils.data.DataLoader( eval_dataset, batch_size=input_cfg.batch_size, shuffle=False, num_workers=input_cfg.num_workers, pin_memory=False, collate_fn=merge_second_batch) if train_cfg.enable_mixed_precision: float_dtype = torch.float16 else: float_dtype = torch.float32 net.eval() result_path_step = result_path / f"step_{net.get_global_step()}" result_path_step.mkdir(parents=True, exist_ok=True) t = time.time() dt_annos = [] global_set = None print("Generate output labels...") bar = ProgressBar() bar.start(len(eval_dataset) // input_cfg.batch_size + 1) for example in iter(eval_dataloader): example = example_convert_to_torch(example, float_dtype) if pickle_result: dt_annos += predict_kitti_to_anno(net, example, class_names, center_limit_range, model_cfg.lidar_input, global_set) else: _predict_kitti_to_file(net, example, result_path_step, class_names, center_limit_range, model_cfg.lidar_input) bar.print_bar() sec_per_example = len(eval_dataset) / (time.time() - t) print(f'generate label finished({sec_per_example:.2f}/s). start eval:') print(f"avg forward time per example: {net.avg_forward_time:.3f}") print(f"avg postprocess time per example: {net.avg_postprocess_time:.3f}") if not predict_test: gt_annos = [info["annos"] for info in eval_dataset.dataset.kitti_infos] if not pickle_result: dt_annos = kitti.get_label_annos(result_path_step) result = get_official_eval_result(gt_annos, dt_annos, class_names) print(result) result = get_coco_eval_result(gt_annos, dt_annos, class_names) print(result) if pickle_result: with open(result_path_step / "result.pkl", 'wb') as f: pickle.dump(dt_annos, f)