def test_host_data_copied_to_device(self): with Calibrator(generate_data(1)) as calibrator: [ptr] = calibrator.get_batch(names=["x"]) v = cuda.DeviceView(ptr, shape=(1, 1, 2, 2), dtype=np.float32) arr = v.numpy() assert arr.shape == (1, 1, 2, 2) assert np.all(arr == 1)
def test_device_view_dynamic_shapes(self, use_view): model = ONNX_MODELS["dynamic_identity"] profiles = [ Profile().add("X", (1, 2, 1, 1), (1, 2, 2, 2), (1, 2, 4, 4)), ] runner = TrtRunner(EngineFromNetwork(NetworkFromOnnxBytes(model.loader), CreateConfig(profiles=profiles))) with runner, cuda.DeviceArray(shape=(1, 2, 3, 3), dtype=np.float32) as arr: inp = np.random.random_sample(size=(1, 2, 3, 3)).astype(np.float32) arr.copy_from(inp) outputs = runner.infer({"X": cuda.DeviceView(arr.ptr, arr.shape, arr.dtype) if use_view else arr}) assert np.all(outputs["Y"] == inp) assert outputs["Y"].shape == (1, 2, 3, 3)
def generate_dev_data(num_batches): with cuda.DeviceArray(shape=(1, ), dtype=np.float32) as x: for _ in range(num_batches): x.copy_from(np.ones((1, ), dtype=np.float32)) xdata = { "array": x, "view": cuda.DeviceView(x.ptr, x.shape, x.dtype), "pointer": x.ptr }[mode] yield { "X0": xdata, "Y0": np.zeros((1, ), dtype=np.float32) }
def test_device_views(self, use_view): model = ONNX_MODELS["reducable"] network_loader = NetworkFromOnnxBytes(model.loader) with TrtRunner( EngineFromNetwork(network_loader)) as runner, cuda.DeviceArray( (1, ), dtype=np.float32) as x: x.copy_from(np.ones((1, ), dtype=np.float32)) outputs = runner.infer({ "X0": cuda.DeviceView(x.ptr, x.shape, x.dtype) if use_view else x, "Y0": np.ones((1, ), dtype=np.float32) }) assert outputs["identity_out_6"][0] == 2 assert outputs["identity_out_8"][0] == 2
def test_calibrator_data_and_ordering_correct(self): def generate_multidata(num_batches): for _ in range(num_batches): yield { "x0": np.zeros((4, 5), dtype=np.float32), "x1": cuda.DeviceArray(dtype=np.float32).copy_from( np.ones((4, 5), dtype=np.float32)), "x2": cuda.DeviceArray(dtype=np.float32).copy_from( np.ones((4, 5), dtype=np.float32) * 2).ptr, } NUM_BATCHES = 2 with Calibrator(generate_multidata(NUM_BATCHES)) as calibrator: for _ in range(NUM_BATCHES): ptrs = calibrator.get_batch(names=["x0", "x1", "x2"]) for index, ptr in enumerate(ptrs): v = cuda.DeviceView(ptr, shape=(4, 5), dtype=np.float32) assert np.all(v.numpy() == index)