def test_meta_schedule_tune_relay( model_name: str, input_shape: List[int], target: str, ): dev = tvm.cpu() if str(target).startswith("llvm") else tvm.cuda() if model_name.startswith("bert"): data = tvm.nd.array(np.random.randint(0, 30521, size=input_shape), dev) # embedding size else: data = tvm.nd.array( np.random.randn(*input_shape).astype("float32"), dev) mod, params, (input_name, _, _) = get_network(name=model_name, input_shape=input_shape) target = Target(target) with tempfile.TemporaryDirectory() as work_dir: rt_mod1: tvm.runtime.Module = tune_relay( mod=mod, params=params, target=target, config=TuneConfig( strategy="evolutionary", num_trials_per_iter=32, max_trials_per_task=20000, max_trials_global=20000, search_strategy_config={ "genetic_num_iters": 10, }, ), work_dir=work_dir, database=JSONDatabase( osp.join(work_dir, "workload.json"), osp.join(work_dir, "records.json"), ), ) # Compile without meta-scheduler for correctness check with tvm.transform.PassContext(opt_level=0): rt_mod2 = relay.build(mod, target=target, params=params) def get_output(data, lib): module = graph_executor.GraphModule(lib["default"](dev)) module.set_input(input_name, data) module.run() return module.get_output(0).numpy() # Check correctness actual_output = get_output(data, rt_mod1) expected_output = get_output(data, rt_mod2) assert np.allclose(actual_output, expected_output, rtol=1e-4, atol=2e-4)
def test_meta_schedule_tune_relay( model_name: str, input_shape: List[int], target: str, ): dev = tvm.cpu() if str(target).startswith("llvm") else tvm.cuda() if model_name.startswith("bert"): data = tvm.nd.array(np.random.randint(0, 30521, size=input_shape), dev) # embedding size else: data = tvm.nd.array( np.random.randn(*input_shape).astype("float32"), dev) mod, params, (input_name, _, _) = get_network(name=model_name, input_shape=input_shape) target = Target(target) with tempfile.TemporaryDirectory() as work_dir: database = DummyDatabase() rt_mod: tvm.runtime.Module = tune_relay( mod=mod, params=params, target=target, config=ReplayTraceConfig( num_trials_per_iter=32, num_trials_total=32, ), work_dir=work_dir, database=database, ) # Compile without meta-scheduler for correctness check with tvm.transform.PassContext(opt_level=0): rt_mod2 = relay.build(mod, target=Target("llvm"), params=params) def get_output(data, lib): module = graph_executor.GraphModule(lib["default"](dev)) module.set_input(input_name, data) module.run() return module.get_output(0).numpy() # Check correctness actual_output = get_output(data, rt_mod) expected_output = get_output( tvm.nd.array(data.numpy(), device=tvm.cpu()), rt_mod2) assert np.allclose(actual_output, expected_output, rtol=1e-4, atol=2e-4)
def test_meta_schedule_tune_relay(model_name: str, batch_size: int, target: str): if model_name == "inception_v3" and batch_size == 1: pytest.skip("inception_v3 does not handle batch_size of 1") input_shape: Tuple[int, ...] input_name = "input0" dev = tvm.cpu() if str(target).startswith("llvm") else cuda() if MODEL_TYPES[model_name] == MODEL_TYPE.TEXT_CLASSIFICATION: seq_length = 128 input_name = "input_ids" input_shape = (batch_size, seq_length) data = tvm.nd.array(np.random.randint(0, 30521, size=input_shape), dev) # embedding size else: if MODEL_TYPES[model_name] == MODEL_TYPE.IMAGE_CLASSIFICATION: input_shape = (batch_size, 3, 299, 299) elif MODEL_TYPES[model_name] == MODEL_TYPE.SEGMENTATION: input_shape = (batch_size, 3, 299, 299) elif MODEL_TYPES[model_name] == MODEL_TYPE.OBJECT_DETECTION: input_shape = (1, 3, 300, 300) elif MODEL_TYPES[model_name] == MODEL_TYPE.VIDEO_CLASSIFICATION: input_shape = (batch_size, 3, 3, 299, 299) else: raise ValueError("Unsupported model: " + model_name) data = tvm.nd.array(np.random.randn(*input_shape).astype("float32"), dev) output_shape: Tuple[int, int] = (batch_size, 1000) mod, params = get_torch_model( model_name=model_name, input_shape=input_shape, output_shape=output_shape, dtype="float32", ) with tempfile.TemporaryDirectory() as work_dir: target = Target(target) database = DummyDatabase() rt_mod: tvm.module = tune_relay( mod=mod, params=params, target=target, config=ReplayTraceConfig( num_trials_per_iter=32, num_trials_total=32, ), work_dir=work_dir, database=database, ) # Compile without meta-scheduler for correctness check with tvm.transform.PassContext(opt_level=0): rt_mod2 = relay.build(mod, target=target, params=params) def get_output(data, lib): module = graph_executor.GraphModule(lib["default"](dev)) module.set_input(input_name, data) module.run() return module.get_output(0).numpy() # Check correctness actual_output = get_output(data, rt_mod) expected_output = get_output(data, rt_mod2) assert np.allclose(actual_output, expected_output, rtol=1e-4, atol=2e-4)