def _setup_graph(self): num_gpu = cfg.TRAIN.NUM_GPUS if cfg.TRAINER == 'replicated': # TF bug in version 1.11, 1.12: https://github.com/tensorflow/tensorflow/issues/22750 buggy_tf = get_tf_version_tuple() in [(1, 11), (1, 12)] # Use two predictor threads per GPU to get better throughput self.num_predictor = num_gpu if buggy_tf else num_gpu * 2 self.predictors = [ self._build_predictor(k % num_gpu) for k in range(self.num_predictor) ] self.dataflows = [ get_eval_dataflow(self._eval_dataset, shard=k, num_shards=self.num_predictor) for k in range(self.num_predictor) ] else: # Eval on all ranks and use gather self.predictor = self._build_predictor(0) if self.batched: self.dataflow = get_batched_eval_dataflow( self._eval_dataset, shard=hvd.rank(), num_shards=hvd.size(), batch_size=self.batch_size) else: self.dataflow = get_eval_dataflow(self._eval_dataset, shard=hvd.rank(), num_shards=hvd.size())
def do_evaluate(pred_config, output_file): num_gpu = cfg.TRAIN.NUM_GPUS graph_funcs = MultiTowerOfflinePredictor(pred_config, list( range(num_gpu))).get_predictors() for dataset in cfg.DATA.VAL: logger.info("Evaluating {} ...".format(dataset)) dataflows = [ get_eval_dataflow(dataset, shard=k, num_shards=num_gpu) for k in range(num_gpu) ] all_results = multithread_predict_dataflow(dataflows, graph_funcs) output = output_file + '-' + dataset DetectionDataset().eval_or_save_inference_results( all_results, dataset, output) for dataset in cfg.DATA.TEST: logger.info("Evaluating {} ...".format(dataset)) dataflows = [ get_eval_dataflow(dataset, shard=k, num_shards=num_gpu) for k in range(num_gpu) ] all_results = multithread_predict_dataflow(dataflows, graph_funcs) output = output_file + '-' + dataset DetectionDataset().eval_or_save_inference_results( all_results, dataset, output)
def _setup_graph(self): num_gpu = cfg.TRAIN.NUM_GPUS if cfg.TRAINER == 'replicated': # TF bug in version 1.11, 1.12: https://github.com/tensorflow/tensorflow/issues/22750 buggy_tf = get_tf_version_tuple() in [(1, 11), (1, 12)] # Use two predictor threads per GPU to get better throughput self.num_predictor = num_gpu if buggy_tf else num_gpu * 2 self.predictors = [ self._build_predictor(k % num_gpu) for k in range(self.num_predictor) ] self.dataflows = [ get_eval_dataflow(self._eval_dataset, shard=k, num_shards=self.num_predictor) for k in range(self.num_predictor) ] else: # Only eval on the first machine, # Because evaluation assumes that all horovod workers share the filesystem. # Alternatively, can eval on all ranks and use allgather, but allgather sometimes hangs self._horovod_run_eval = hvd.rank() == hvd.local_rank() if self._horovod_run_eval: self.predictor = self._build_predictor(0) self.dataflow = get_eval_dataflow(self._eval_dataset, shard=hvd.local_rank(), num_shards=hvd.local_size()) self.barrier = hvd.allreduce(tf.random_normal(shape=[1]))
def set_up_graph(self, trainer: tp.Trainer) -> None: self.trainer = trainer if self.trainer_type == "replicated": # Use multiple predictor threads per GPU to get better throughput. self.num_predictor = self.num_gpus * 2 self.predictors = [ self._build_predictor(k % self.num_gpus) for k in range(self.num_predictor) ] self.dataflows = [ get_eval_dataflow( # type: ignore self._eval_dataset, self.is_aws, self.is_gcs, shard=k, num_shards=self.num_predictor, ) for k in range(self.num_predictor) ] else: if self.machine_rank == 0: # Run validation on one machine. self.predictor = self._build_predictor(0) self.dataflow = get_eval_dataflow( self._eval_dataset, self.is_aws, self.is_gcs, shard=hvd.local_rank(), num_shards=hvd.local_size(), ) # All workers must take part in this barrier, even if they # are not performing validation. self.barrier = hvd.allreduce(tf.random_normal(shape=[1]))
def _setup_graph(self): num_gpu = cfg.TRAIN.NUM_GPUS if cfg.TRAINER == 'replicated': # Use two predictor threads per GPU to get better throughput self.num_predictor = num_gpu * 2 self.predictors = [ self._build_coco_predictor(k % num_gpu) for k in range(self.num_predictor) ] self.dataflows = [ get_eval_dataflow(shard=k, num_shards=self.num_predictor) for k in range(self.num_predictor) ] else: self.predictor = self._build_coco_predictor(0) self.dataflow = get_eval_dataflow(shard=hvd.rank(), num_shards=hvd.size()) # use uint8 to aggregate strings self.local_result_tensor = tf.placeholder( tf.uint8, shape=[None], name='local_result_string') self.concat_results = hvd.allgather(self.local_result_tensor, name='concat_results') local_size = tf.expand_dims(tf.size(self.local_result_tensor), 0) self.string_lens = hvd.allgather(local_size, name='concat_sizes')
def _setup_graph(self): num_gpu = cfg.TRAIN.NUM_GPUS # Use two predictor threads per GPU to get better throughput self.num_predictor = 1 if cfg.TRAINER == 'horovod' else num_gpu * 2 self.predictors = [self._build_coco_predictor(k % num_gpu) for k in range(self.num_predictor)] self.dataflows = [get_eval_dataflow(shard=k, num_shards=self.num_predictor) for k in range(self.num_predictor)]
def offline_evaluate(pred_func, output_file): df = get_eval_dataflow() all_results = eval_coco( df, lambda img: detect_one_image(img, pred_func)) with open(output_file, 'w') as f: json.dump(all_results, f) print_evaluation_scores(output_file)
def offline_evaluate(pred_func, output_file): df = get_eval_dataflow() all_results = eval_on_dataflow( df, lambda img: detect_one_image(img, pred_func)) with open(output_file, 'w') as f: json.dump(all_results, f) print_evaluation_scores(output_file)
def _setup_graph(self): num_gpu = cfg.TRAIN.NUM_GPUS if cfg.TRAINER == 'replicated': # Use two predictor threads per GPU to get better throughput self.num_predictor = num_gpu * 2 self.predictors = [self._build_coco_predictor(k % num_gpu) for k in range(self.num_predictor)] self.dataflows = [get_eval_dataflow(shard=k, num_shards=self.num_predictor) for k in range(self.num_predictor)] else: # Only eval on the first machine. # Alternatively, can eval on all ranks and use allgather, but allgather sometimes hangs self._horovod_run_eval = hvd.rank() == hvd.local_rank() if self._horovod_run_eval: self.predictor = self._build_coco_predictor(0) self.dataflow = get_eval_dataflow(shard=hvd.local_rank(), num_shards=hvd.local_size()) self.barrier = hvd.allreduce(tf.random_normal(shape=[1]))
def _setup_graph(self): num_gpu = cfg.TRAIN.NUM_GPUS if cfg.TRAINER == 'replicated': # Use two predictor threads per GPU to get better throughput self.num_predictor = num_gpu * 2 self.predictors = [self._build_coco_predictor(k % num_gpu) for k in range(self.num_predictor)] self.dataflows = [get_eval_dataflow(shard=k, num_shards=self.num_predictor) for k in range(self.num_predictor)] else: # Only eval on the first machine. # Alternatively, can eval on all ranks and use allgather, but allgather sometimes hangs self._horovod_run_eval = hvd.rank() == hvd.local_rank() if self._horovod_run_eval: self.predictor = self._build_coco_predictor(0) self.dataflow = get_eval_dataflow(shard=hvd.local_rank(), num_shards=hvd.local_size()) self.barrier = hvd.allreduce(tf.random_normal(shape=[1]))
def _setup_graph(self): self.pred = self.trainer.get_predictor( ['image'], ['fastrcnn_fg_probs', 'fastrcnn_fg_boxes']) self.df = PrefetchDataZMQ(get_eval_dataflow(), 1) EVAL_TIMES = 5 # eval 5 times during training interval = self.trainer.config.max_epoch // (EVAL_TIMES + 1) self.epochs_to_eval = set([interval * k for k in range(1, EVAL_TIMES)]) self.epochs_to_eval.add(self.trainer.config.max_epoch) get_tf_nms() # just to make sure the nms part of graph is created
def do_evaluate(pred_config, output_file): num_tower = max(cfg.TRAIN.NUM_GPUS, 1) graph_funcs = MultiTowerOfflinePredictor( pred_config, list(range(num_tower))).get_predictors() for dataset in cfg.DATA.VAL: logger.info("Evaluating {} ...".format(dataset)) dataflows = [ get_eval_dataflow(dataset, shard=k, num_shards=num_tower) for k in range(num_tower)] all_results = multithread_predict_dataflow(dataflows, graph_funcs) output = output_file + '-' + dataset DatasetRegistry.get(dataset).eval_inference_results(all_results, output)
def offline_evaluate(model_path, output_file): pred = OfflinePredictor( PredictConfig(model=Model(), session_init=get_model_loader(model_path), input_names=['image'], output_names=[ 'fastrcnn_fg_probs', 'fastrcnn_fg_boxes', ])) df = get_eval_dataflow() df = PrefetchDataZMQ(df, 1) all_results = eval_on_dataflow(df, lambda img: detect_one_image(img, pred)) with open(output_file, 'w') as f: json.dump(all_results, f) print_evaluation_scores(output_file)
def do_evaluate(pred_config, output_file, batch_size): ''' Multi-gpu evaluation, if available ''' num_tower = max(cfg.TRAIN.NUM_GPUS, 1) graph_funcs = MultiTowerOfflinePredictor(pred_config, list( range(num_tower))).get_predictors() dataflows = [ get_eval_dataflow(batch_size, shard=k, num_shards=num_tower) for k in range(num_tower) ] all_results = multithread_pred_dataflow(dataflows, graph_funcs) # df = get_eval_dataflow() # all_results = pred_dataflow(df, lambda img: detect_batch(img, pred_func)) logger.info('Dumping evaluation results') np.savez(output_file, **all_results) return print_evaluation_scores(output_file)
def offline_evaluate(pred_config, output_file): num_gpu = cfg.TRAIN.NUM_GPUS graph_funcs = MultiTowerOfflinePredictor(pred_config, list( range(num_gpu))).get_predictors() predictors = [] dataflows = [] for k in range(num_gpu): predictors.append( lambda img, pred=graph_funcs[k]: detect_one_image(img, pred)) dataflows.append(get_eval_dataflow(shard=k, num_shards=num_gpu)) if num_gpu > 1: all_results = multithread_eval_coco(dataflows, predictors) else: all_results = eval_coco(dataflows[0], predictors[0]) with open(output_file, 'w') as f: json.dump(all_results, f) print_coco_metrics(output_file)
def evaluate_rcnn(model_name, paper_arxiv_id, cfg_list, model_file): evaluator = COCOEvaluator( root=COCO_ROOT, model_name=model_name, paper_arxiv_id=paper_arxiv_id ) category_id_to_coco_id = { v: k for k, v in COCODetection.COCO_id_to_category_id.items() } cfg.update_config_from_args(cfg_list) # TODO backup/restore config finalize_configs(False) MODEL = ResNetFPNModel() if cfg.MODE_FPN else ResNetC4Model() predcfg = PredictConfig( model=MODEL, session_init=SmartInit(model_file), input_names=MODEL.get_inference_tensor_names()[0], output_names=MODEL.get_inference_tensor_names()[1], ) predictor = OfflinePredictor(predcfg) def xyxy_to_xywh(box): box[2] -= box[0] box[3] -= box[1] return box df = get_eval_dataflow("coco_val2017") df.reset_state() for img, img_id in tqdm.tqdm(df, total=len(df)): results = predict_image(img, predictor) res = [ { "image_id": img_id, "category_id": category_id_to_coco_id.get( int(r.class_id), int(r.class_id) ), "bbox": xyxy_to_xywh([round(float(x), 4) for x in r.box]), "score": round(float(r.score), 3), } for r in results ] evaluator.add(res) if evaluator.cache_exists: break evaluator.save()
def offline_evaluate(pred_config, output_file): num_gpu = cfg.TRAIN.NUM_GPUS graph_funcs = MultiTowerOfflinePredictor(pred_config, list( range(num_gpu))).get_predictors() predictors = [] for k in range(num_gpu): predictors.append( lambda img, pred=graph_funcs[k]: detect_one_image(img, pred)) for dataset in cfg.DATA.VAL: logger.info("Evaluating {} ...".format(dataset)) dataflows = [ get_eval_dataflow(dataset, shard=k, num_shards=num_gpu) for k in range(num_gpu) ] if num_gpu > 1: all_results = multithread_eval_coco(dataflows, predictors) else: all_results = eval_coco(dataflows[0], predictors[0]) output = output_file + '-' + dataset with open(output, 'w') as f: json.dump(all_results, f) print_coco_metrics(dataset, output)
def _setup_graph(self): self.pred_1st = self.trainer.get_predictor(self._in_names, self._out_names_1st) self.pred_2nd = self.trainer.get_predictor(self._in_names, self._out_names_2nd) self.pred_3rd = self.trainer.get_predictor(self._in_names, self._out_names_3rd) self.df = get_eval_dataflow()
def _setup_graph(self): self.pred = self.trainer.get_predictor( ['image'], get_model_output_names()) self.df = get_eval_dataflow()
def _setup_graph(self): self.pred = self.trainer.get_predictor(self._in_names, self._out_names) self.df = get_eval_dataflow()
def _setup_graph(self): self.pred = self.trainer.get_predictor(self._in_names, self._out_names) self.df = get_eval_dataflow()
def _setup_graph(self): self.pred = self.trainer.get_predictor( ['image'], ['final_boxes', 'final_probs', 'final_labels']) self.df = get_eval_dataflow()
def offline_evaluate(pred_func, output_file): df = get_eval_dataflow()
def _setup_graph(self): self.pred = self.trainer.get_predictor(['image'], get_model_output_names()) self.df = get_eval_dataflow()
if args.visualize: do_visualize(MODEL, args.load) else: predcfg = PredictConfig( model=MODEL, session_init=get_model_loader(args.load), input_names=MODEL.get_inference_tensor_names()[0], output_names=MODEL.get_inference_tensor_names()[1]) if args.compact: ModelExporter(predcfg).export_compact(args.compact, optimize=False) elif args.serving: ModelExporter(predcfg).export_serving(args.serving, optimize=False) if args.predict: predictor = OfflinePredictor(predcfg) for image_file in args.predict: do_predict(predictor, image_file) elif args.evaluate: assert args.evaluate.endswith('.json'), args.evaluate do_evaluate(predcfg, args.evaluate) elif args.benchmark: df = get_eval_dataflow(cfg.DATA.VAL[0]) df.reset_state() predictor = OfflinePredictor(predcfg) for img in tqdm.tqdm(df, total=len(df)): # This include post-processing time, which is done on CPU and not optimized # To exclude it, modify `predict_image`. predict_image(img[0], predictor)
def _setup_graph(self): self.pred = self.trainer.get_predictor( ['image'], ['fastrcnn_fg_probs', 'fastrcnn_fg_boxes']) self.df = PrefetchDataZMQ(get_eval_dataflow(), 1) get_tf_nms() # just to make sure the nms part of graph is created