def run_test(self, mod, inputs, expected_ops, unexpected_ops, interpreter, rtol, atol): with torch.no_grad(): cuda_inputs = [] for i in inputs: cuda_inputs.append(i.cuda()) mod.eval() if len(expected_ops): self.assert_has_op(mod, expected_ops) if unexpected_ops: self.assert_unexpected_op(mod, unexpected_ops) interpreter_result = interpreter.run(fp16_mode=False) trt_mod = TRTModule( interpreter_result.engine, interpreter_result.input_names, interpreter_result.output_names, ) ref_outputs = mod(*inputs) outputs = trt_mod(*cuda_inputs) if isinstance(outputs, torch.Tensor): ref_outputs = [ref_outputs] outputs = [outputs] for out, ref in zip(outputs, ref_outputs): torch.testing.assert_allclose(out.cpu(), ref, rtol=rtol, atol=atol)
def run_test_custom_compare_results( self, mod, inputs, expected_ops, interpreter, comparators: List[Tuple[Callable, List]], fp16_mode=False, ): """ Runs the test and compares the result using the provided comparators. The size of comparators must be equal to the number of outputs from 'mod'. mod - a model to run. inputs - a list of the model inputs. expected ops - a list of ops that should be verified. interpreter - used for converting the model to TRT. comparators - a list of (func, args) pairs corresponding to each of the module outputs. usage: func(x, y, *args) """ with torch.no_grad(): cuda_inputs = [] for i in inputs: cuda_inputs.append(i.cuda()) mod.eval() if len(expected_ops): self.assert_has_op(mod, expected_ops) interpreter_result = interpreter.run( lower_precision=LowerPrecision. FP16 if fp16_mode else LowerPrecision.FP32) trt_mod = TRTModule( interpreter_result.engine, interpreter_result.input_names, interpreter_result.output_names, ) res_trt = trt_mod(*cuda_inputs).cpu() res_cpu = mod(*inputs) assert len(res_trt) == len(res_cpu) assert len(res_cpu) == len(comparators) for output_trt, output_cpu, comparator in zip( res_trt, res_cpu, comparators): comp_func = comparator[0] args = comparator[1] self.assertTrue(comp_func(output_trt, output_cpu, *args))
def run_test(self, mod, inputs, expected_ops, unexpected_ops, interpreter, rtol, atol, precision=LowerPrecision.FP32): with torch.no_grad(): cuda_inputs = [] for i in inputs: cuda_inputs.append(i.cuda()) mod.eval() if len(expected_ops): self.assert_has_op(mod, expected_ops) if unexpected_ops: self.assert_unexpected_op(mod, unexpected_ops) interpreter_result = interpreter.run(lower_precision=precision) trt_mod = TRTModule( interpreter_result.engine, interpreter_result.input_names, interpreter_result.output_names, ) ref_outputs = mod(*inputs) outputs = trt_mod(*cuda_inputs) if isinstance(outputs, torch.Tensor): ref_outputs = [ref_outputs] outputs = [outputs] for out, ref in zip(outputs, ref_outputs): if not isinstance(ref, torch.Tensor): ref = torch.tensor([ref]) ref = ref.cpu() # to_dtype test has cases with gpu output torch.testing.assert_allclose(out.cpu(), ref, rtol=rtol, atol=atol)