def _run_pytorch(self, config: BenchmarkConfig) -> Benchmark: """ :return: """ LOGGER.info("Running PyTorch Eager benchmark") benchmark = Benchmark() dummy_inputs = self._get_dummy_inputs( batch_size=config.batch_size, seq_len=(config.sequence_length - self.tokenizer.num_special_tokens_to_add(pair=False)) ) inputs = self.tokenizer( dummy_inputs, is_split_into_words=True, return_tensors=TensorType.PYTORCH, ) inputs = inputs.to(config.device) self.model = self.model.to(config.device) # Warmup for _ in trange(config.warmup_runs, desc="Warming up"): self.model(**inputs) # Run benchmark benchmark_duration_ns = config.benchmark_duration * SEC_TO_NS_SCALE while sum(benchmark.latencies) < benchmark_duration_ns: with benchmark.track(): self.model(**inputs) benchmark.finalize(benchmark_duration_ns) return benchmark
def execute(self, config: 'BenchmarkConfig') -> Benchmark: benchmark = Benchmark() try: model_opt_path = Path(self.onnx_path) opt_onnx_path = model_opt_path.with_suffix(".opt" + model_opt_path.suffix) model_opt = optimize_model( self.onnx_path, model_type="bert", opt_level=int(self.session_opts.graph_optimization_level)) model_opt.save_model_to_file(opt_onnx_path.absolute().as_posix()) self.optimized_onnx_graph = opt_onnx_path.absolute().as_posix() except Exception as e: LOGGER.error(f"Unable to optimize ONNX BERT model: {e}") session = InferenceSession(self.optimized_onnx_graph or self.onnx_path, self.session_opts) dummy_inputs = self._get_dummy_inputs( batch_size=config.batch_size, seq_len=(config.sequence_length - self.tokenizer.num_special_tokens_to_add(pair=False))) inputs = self.tokenizer( dummy_inputs, is_split_into_words=True, return_tensors=TensorType.NUMPY, ) inputs = {k: v.astype("i8") for k, v in inputs.items()} # Warmup for _ in trange(config.warmup_runs, desc="Warming up"): session.run(None, inputs) # Run benchmark benchmark_duration_ns = config.benchmark_duration * SEC_TO_NS_SCALE while sum(benchmark.latencies) < benchmark_duration_ns: with benchmark.track(): session.run(None, inputs) benchmark.finalize(benchmark_duration_ns) return benchmark
def _run_torchscript(self, config: BenchmarkConfig) -> Benchmark: """ :return: """ LOGGER.info("Running TorchScript benchmark") benchmark = Benchmark() dummy_inputs = self._get_dummy_inputs( batch_size=config.batch_size, seq_len=(config.sequence_length - self.tokenizer.num_special_tokens_to_add(pair=False)) ) inputs = self.tokenizer( dummy_inputs, is_split_into_words=True, return_tensors=TensorType.PYTORCH, ) inputs.to(config.device) self.model = self.model.to(config.device) # To be sure inputs will be presented with the right prototype ordered_inputs = OrderedDict({ "input_ids": inputs.input_ids, "attention_mask": inputs.attention_mask, "token_type_ids": inputs.token_type_ids, }) LOGGER.debug("Calling torch JIT on model (optimize=True)") model_scripted = torch.jit.trace(self.model, tuple(ordered_inputs.values())) with torch.jit.optimized_execution(True): for _ in trange(config.warmup_runs, desc="Warming up"): model_scripted(*ordered_inputs.values()) benchmark_duration_ns = config.benchmark_duration * SEC_TO_NS_SCALE while sum(benchmark.latencies) < benchmark_duration_ns: with benchmark.track(): model_scripted(*ordered_inputs.values()) benchmark.finalize(benchmark_duration_ns) return benchmark