def test_byoc_microtvm(merge_compiler_regions): """ This is a simple test to check BYOC capabilities of AOT with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036 """ use_unpacked_api = False interface_api = "packed" test_runner = AOT_DEFAULT_RUNNER input_x = relay.var("x", shape=(10, 10)) input_w0 = relay.var("w0", shape=(10, 10)) input_w1 = relay.var("w1", shape=(10, 10)) # z0 = x + w0 marked_input_x = compiler_begin(input_x, "ccompiler") marked_input_w0 = compiler_begin(input_w0, "ccompiler") add_x_and_w0 = relay.add(marked_input_x, marked_input_w0) end_inner_add = compiler_end(add_x_and_w0, "ccompiler") # z1 = z0 + w1 marked_inner_add = compiler_begin(end_inner_add, "ccompiler") marked_w1 = compiler_begin(input_w1, "ccompiler") add_nested_and_w1 = relay.add(marked_inner_add, marked_w1) end_outer_add = compiler_end(add_nested_and_w1, "ccompiler") # z2 = z0 + z1 final_add = relay.add(end_inner_add, end_outer_add) relay_func = relay.Function([input_x, input_w0, input_w1], final_add) mod = tvm.IRModule() mod["main"] = relay_func if merge_compiler_regions: mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph("mod_name")(mod) mod = transform.InferType()(mod) x_data = [("x", np.random.rand(10, 10).astype("float32"))] w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)] map_inputs = OrderedDict(x_data + w_data) output_list = generate_ref_data(mod, map_inputs) compile_and_run( AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def _construct_model(func, m1, m2): mod = tvm.IRModule() mod["main"] = func mod = transform.AnnotateTarget("coremlcompiler")(mod) mod = transform.PartitionGraph()(mod) fcompile = tvm._ffi.get_global_func("relay.ext.coremlcompiler") for var, func in mod.functions.items(): if func.attrs and "Compiler" in func.attrs and func.attrs[ "Compiler"] == "coremlcompiler": fcompile(func)
def test_extern_ccompiler_default_ops(): def expected(): mod = tvm.IRModule() x = relay.var("x", shape=(8, 8)) y = relay.var("y", shape=(8, 8)) x0 = relay.var("x0", shape=(8, 8)) y0 = relay.var("y0", shape=(8, 8)) add = x0 + y0 # Function that uses C compiler func = relay.Function([x0, y0], add) func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1)) func = func.with_attr("Compiler", tvm.tir.StringImm("ccompiler")) func = func.with_attr("ExternalSymbol", tvm.tir.StringImm("ccompiler_0")) glb_0 = relay.GlobalVar("ccompiler_0") mod[glb_0] = func add_call = relay.Call(glb_0, [x, y]) # Function that uses default compiler. Ops are fused in this function. p0 = relay.var("p0", shape=(8, 8)) log = relay.log(p0) exp = relay.exp(p0) concat = relay.concatenate([log, exp], axis=0) fused_func = relay.Function([p0], concat) fused_func = fused_func.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) fused_call = relay.Call(fused_func, [add_call]) main = relay.Function([x, y], fused_call) mod["main"] = main return mod x = relay.var("x", shape=(8, 8)) y = relay.var("y", shape=(8, 8)) add = x + y log = relay.log(add) exp = relay.exp(add) concat = relay.concatenate([log, exp], axis=0) f = relay.Function([x, y], concat) mod = tvm.IRModule() mod["main"] = f mod = WhiteListAnnotator(["add", "subtract", "multiply"], "ccompiler")(mod) mod = transform.PartitionGraph()(mod) fused_mod = transform.FuseOps(2)(mod) expected_mod = expected() assert relay.alpha_equal(fused_mod, expected_mod) x_data = np.random.rand(8, 8).astype('float32') y_data = np.random.rand(8, 8).astype('float32') np_add = x_data + y_data res = np.concatenate([np.log(np_add), np.exp(np_add)]) check_result(mod, {"x": x_data, "y": y_data}, (16, 8), res)
def test_constant(): """Test the subgraph with (var, const, ...) arguments.""" if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, 32, 3, 3) data = relay.var("data", shape=ishape, dtype=dtype) weight = relay.var("weight", shape=wshape, dtype=dtype) bn_gamma = relay.var("bn_gamma") bn_beta = relay.var("bn_beta") bn_mmean = relay.var("bn_mean") bn_mvar = relay.var("bn_var") layer = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3), padding=(1, 1)) bn_output = relay.nn.batch_norm(layer, bn_gamma, bn_beta, bn_mmean, bn_mvar) out = bn_output[0] out = relay.nn.relu(out) func = relay.Function(relay.analysis.free_vars(out), out) ref_mod, params = tvm.relay.testing.create_workload(func) ref_mod["main"] = bind_params_by_name(ref_mod["main"], params) remove_bn_pass = tvm.transform.Sequential([ transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), ]) dnnl_patterns = get_pattern_table("dnnl") composite_partition = tvm.transform.Sequential([ transform.MergeComposite(dnnl_patterns), transform.AnnotateTarget("dnnl"), transform.PartitionGraph(), ]) with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): ref_mod = remove_bn_pass(ref_mod) mod = composite_partition(ref_mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) check_result(mod, ref_mod, {"data": i_data}, (1, 32, 14, 14), tol=1e-5)
def test_byoc_microtvm(merge_compiler_regions): """This is a simple test to check BYOC capabilities of AOT - with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036""" use_unpacked_api = False interface_api = "packed" test_runner = AOT_DEFAULT_RUNNER x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) # z0 = x + w0 x_ = compiler_begin(x, "ccompiler") w0_ = compiler_begin(w0, "ccompiler") z0_ = relay.add(x_, w0_) z0 = compiler_end(z0_, "ccompiler") # z1 = z0 + w1 z0__ = compiler_begin(z0, "ccompiler") w1_ = compiler_begin(w1, "ccompiler") z1_ = relay.add(z0__, w1_) z1 = compiler_end(z1_, "ccompiler") # z2 = z0 + z1 z2 = relay.add(z0, z1) f = relay.Function([x, w0, w1], z2) mod = tvm.IRModule() mod["main"] = f if merge_compiler_regions: mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph("mod_name")(mod) mod = transform.InferType()(mod) x_data = [("x", np.random.rand(10, 10).astype("float32"))] w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)] map_inputs = OrderedDict(x_data + w_data) output_list = generate_ref_data(mod, map_inputs) compile_and_run( AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def test_partial_constant(): """Test the subgraph with (const, var, const, var) arguments.""" # if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True): # print("skip because DNNL codegen is not available") # return dtype = "float32" ishape = (10, 10) in_1 = relay.var("in_1", shape=ishape, dtype=dtype) in_2 = relay.var("in_2", shape=ishape, dtype=dtype) in_3 = relay.var("in_3", shape=ishape, dtype=dtype) in_4 = relay.var("in_4", shape=ishape, dtype=dtype) add1 = relay.add(in_1, in_2) add2 = relay.add(add1, in_3) add3 = relay.add(add2, in_3) add4 = relay.add(add3, in_3) func = relay.Function([in_1, in_2, in_3, in_4], add4) ref_mod = tvm.IRModule.from_expr(func) ref_mod = relay.transform.InferType()(ref_mod) data1 = np.random.uniform(0, 1, ishape).astype(dtype) data3 = np.random.uniform(0, 1, ishape).astype(dtype) params = { "in_1": tvm.nd.array(data1, ctx=tvm.cpu(0)), "in_3": tvm.nd.array(data3, ctx=tvm.cpu(0)), } ref_mod["main"] = bind_params_by_name(ref_mod["main"], params) opt_pass = tvm.transform.Sequential( [ transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), transform.AnnotateTarget("dnnl"), transform.MergeCompilerRegions(), transform.PartitionGraph(), ] ) with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): mod = opt_pass(ref_mod) data2 = np.random.uniform(0, 1, ishape).astype(dtype) data4 = np.random.uniform(0, 1, ishape).astype(dtype) check_result(mod, ref_mod, {"in_2": data2, "in_4": data4}, (10, 10), tol=1e-5)
def test_extern_ccompiler(): x = relay.var('x', shape=(2, 2)) y = relay.var('y', shape=(2, 2)) z = x + x p = y * y f = relay.Function([x, y], p - z) x_data = np.random.rand(2, 2).astype('float32') y_data = np.random.rand(2, 2).astype('float32') mod = tvm.IRModule() mod["main"] = f mod = WhiteListAnnotator(["add", "subtract", "multiply"], "ccompiler")(mod) mod = transform.PartitionGraph()(mod) check_result(mod, {"x": x_data, "y": y_data}, (2, 2), (y_data * y_data) - (x_data + x_data))
def partition_for_vitis_ai(mod, params=None, dpu=None, **opts): """Partition the Relay expression for offloading operators to Vitis AI DPU Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. dpu : str The DPU identifier (e.g. DPUCZDX8G-zcu104, DPUCADF8H) Returns ------- ret : Module """ if dpu is None: raise ValueError( "Please pass Vitis AI DPU identifier to the partitioning function") if params: mod["main"] = bind_params_by_name(mod["main"], params) desired_layouts_in_partition = { "nn.conv2d": ["NHWC", "default"], "nn.upsampling": ["NHWC"], "image.resize2d": ["NHWC"], } desired_layouts_in_main = { "nn.conv2d": ["NCHW", "default"], "nn.upsampling": ["NCHW"], "image.resize2d": ["NCHW"], } seq = tvm.transform.Sequential([ transform.RemoveUnusedFunctions(), transform.ConvertLayout(desired_layouts_in_partition), transform.FoldConstant(), transform.InferType(), VitisAIAnnotationPass("vitis_ai", dpu, params), transform.MergeCompilerRegions(), transform.PartitionGraph(), transform.RemoveUnusedFunctions(), transform.ConvertLayout(desired_layouts_in_main), transform.FoldConstant(), ]) with tvm.transform.PassContext(opt_level=3): return seq(mod)
def test_mobilenet_dnnl(): # if not tvm.get_global_func("relay.ext.dnnl", True): # print("skip because DNNL codegen is not available") # return dtype = "float32" ishape = (1, 3, 224, 224) mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype="float32") mod = transform.AnnotateTarget(["dnnl"])(mod) mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype)
def test_multi_node_compiler(): x = relay.var('x', shape=(10, 10)) w0 = relay.var('w0', shape=(10, 10)) w1 = relay.var('w1', shape=(10, 10)) w2 = relay.var('w2', shape=(10, 10)) w3 = relay.var('w3', shape=(10, 10)) w4 = relay.var('w4', shape=(10, 10)) w5 = relay.var('w5', shape=(10, 10)) w6 = relay.var('w6', shape=(10, 10)) w7 = relay.var('w7', shape=(10, 10)) # C compiler # FIXME: We generate two compilers for this case but they should be merged to one # due to the common input (x). z0 = relay.add(x, w0) p0 = relay.subtract(z0, w1) q0 = relay.multiply(p0, w2) z1 = relay.add(x, w3) p1 = relay.subtract(z1, w4) q1 = relay.multiply(p1, w5) # Other parts on TVM z2 = relay.add(x, w6) q2 = relay.subtract(z2, w7) r = relay.concatenate((q0, q1, q2), axis=0) f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r) mod = tvm.IRModule() ann = CcompilerAnnotator() mod["main"] = ann.visit(f) mod = transform.PartitionGraph()(mod) mod = transform.InferType()(mod) x_data = np.random.rand(10, 10).astype('float32') w_data = [] for _ in range(8): w_data.append(np.random.rand(10, 10).astype('float32')) map_inputs = {"w{}".format(i): w_data[i] for i in range(8)} map_inputs["x"] = x_data check_result( mod, map_inputs, (30, 10), np.concatenate((((x_data + w_data[0]) - w_data[1]) * w_data[2], ((x_data + w_data[3]) - w_data[4]) * w_data[5], x_data + w_data[6] - w_data[7]), axis=0))
def get_partitoned_mod(mod): remove_bn_pass = tvm.transform.Sequential([ transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), ]) byoc_pass = tvm.transform.Sequential([ remove_bn_pass, transform.AnnotateTarget("dnnl"), transform.MergeCompilerRegions(), transform.PartitionGraph(), ]) with tvm.transform.PassContext(opt_level=3, disabled_pass=["AlterOpLayout"]): return byoc_pass(mod)
def build_module( mod, target, dpu_target="DPUCADX8G", params=None, enable_vitis_ai=True, #enable_vitis_ai=False, tvm_ops=0, vitis_ai_partitions=1, ): """Build module for Vitis-AI codegen.""" if isinstance(mod, tvm.relay.expr.Call): mod = tvm.IRModule.from_expr(mod) if params is None: params = {} mod = relay.transform.InferType()(mod) temp = utils.tempdir() print(temp) export_rt_mod_file = temp.relpath("vitis_ai.rtmod") with tvm.transform.PassContext( opt_level=3, config={"relay.ext.vitis_ai.options.target": dpu_target, 'relay.ext.vitis_ai.options.export_runtime_module': export_rt_mod_file} ): if enable_vitis_ai: mod["main"] = bind_params_by_name(mod["main"], params) mod = annotation(mod, params, dpu_target) mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph()(mod) tvm_op_count = get_cpu_op_count(mod) assert tvm_op_count == tvm_ops, "Got {} TVM operators, expected {}".format( tvm_op_count, tvm_ops ) partition_count = 0 for global_var in mod.get_global_vars(): if "vitis_ai" in global_var.name_hint: partition_count += 1 assert ( vitis_ai_partitions == partition_count ), "Got {} Vitis-AI partitions, expected {}".format( partition_count, vitis_ai_partitions ) relay.backend.compile_engine.get().clear() return relay.build(mod, target, params=params)
def test_run(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return ref_mod = annotated(dtype, ishape, w1shape) mod = annotated(dtype, ishape, w1shape) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, w1shape).astype(dtype) ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu()) ref_res = ref_ex.evaluate()(i_data, w1_data) check_result( mod, {"data": i_data, "weight1": w1_data}, (1, 32, 14, 14), ref_res.asnumpy(), tol=1e-5 )
def offload(mod): """Offload ops based on the registered ops Paramters --------- mod : Module The input module. Returns ------- mod : Module The output module with offloaded ops. """ backend = "verilator" mod = transform.AnnotateTarget([backend])(mod) mod = transform.PartitionGraph()(mod) return mod
def test_extern_dnnl_mobilenet(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (1, 3, 224, 224) ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype="float32") mod = transform.AnnotateTarget(["dnnl"])(ref_mod) mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) ref_res = ref_ex.evaluate()(i_data, **params) compile_engine.get().clear() check_result(mod, {"data": i_data}, (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params)
def partition_for_ethosn(mod, params=None, **opts): """Partition the graph greedily offloading supported operators to Arm Ethos-N NPU. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- ret : annotated and partitioned module. """ opts = opts or {} if "variant" not in opts: raise ValueError( "Please specify a variant in the target string, e.g. -variant=n78." ) # -variant=ethos-n78 deprecated in favour of -variant=n78 if opts["variant"].lower() == "ethos-n78": warnings.warn( "Please use '-variant=n78' instead of the deprecated " "'-variant=ethos-n78', which will be removed in TVM v0.9.", DeprecationWarning, ) elif opts["variant"] != "n78": raise ValueError( "When targeting Ethos(TM)-N78, -variant=n78 should be set.") if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential([ transform.InferType(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("ethos-n"), transform.MergeCompilerRegions(), transform.PartitionGraph(), ]) return seq(mod)
def get_partitoned_mod(mod, params, pattern_table): # This is required for constant folding mod["main"] = bind_params_by_name(mod["main"], params) remove_bn_pass = tvm.transform.Sequential([ transform.InferType(), transform.SimplifyInference(), transform.FoldConstant(), transform.FoldScaleAxis(), ]) composite_partition = tvm.transform.Sequential([ remove_bn_pass, transform.MergeComposite(pattern_table), transform.AnnotateTarget("dnnl"), transform.PartitionGraph() ]) with relay.build_config(opt_level=3, disabled_pass=["AlterOpLayout"]): return composite_partition(mod)
def test_mobilenet_dnnl(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (1, 3, 224, 224) mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype="float32") mod = transform.AnnotateTarget(["dnnl"])(mod) mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) data = get_calibration_data(mod, {"data": i_data, **params}) # Check the number and orders check_data_size(mod, data)
def test_constant_tuples(): @tvm.ir.register_op_attr("qnn.concatenate", "target.const_tuples") def add(attrs, args): # pylint: disable=unused-variable return True def create_graph(): a = relay.var("a", shape=(10, 10), dtype="uint8") b = relay.var("b", shape=(10, 10), dtype="uint8") a1 = relay.abs(a) zeroi = relay.const(1, "int32") zerof = relay.const(0, "float32") con = relay.qnn.op.concatenate( (a1, b), input_scales=(zerof, zerof), input_zero_points=(zeroi, zeroi), output_scale=zerof, output_zero_point=zeroi, axis=1, ) f = relay.Function([a, b], con) mod = tvm.IRModule.from_expr(f) mod = transform.InferType()(mod) return mod seq = tvm.transform.Sequential( [ transform.AnnotateTarget("const_tuples"), transform.InferType(), transform.MergeCompilerRegions(), transform.PartitionGraph(), ] ) partitioned = seq(create_graph()) concat = partitioned["const_tuples_0"].body assert type(concat.args[1]) == relay.Tuple assert type(concat.args[2]) == relay.Tuple assert type(concat.args[3]) == relay.Constant assert type(concat.args[4]) == relay.Constant
def test_constant_propagation(): ones = np.ones(shape=(8, 8), dtype="float32") def expected(): mod = tvm.IRModule() x = relay.const(ones) y = relay.var("y", shape=(8, 8)) x0 = relay.const(ones) y0 = relay.var("y0", shape=(8, 8)) add = x0 + y0 # Function that uses C compiler func = relay.Function([y0], add) func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1)) func = func.with_attr("Compiler", tvm.tir.StringImm("ccompiler")) func = func.with_attr("ExternalSymbol", tvm.tir.StringImm("ccompiler_0")) glb_0 = relay.GlobalVar("ccompiler_0") mod[glb_0] = func add_call = relay.Call(glb_0, [y]) log = relay.log(add_call) main = relay.Function([y], log) mod["main"] = main return mod x = relay.var("x", shape=(8, 8)) y = relay.var("y", shape=(8, 8)) add = x + y log = relay.log(add) f = relay.Function([x, y], log) f = relay.build_module.bind_params_by_name(f, {"x": tvm.nd.array(ones)}) mod = tvm.IRModule() mod["main"] = f mod = WhiteListAnnotator(["add"], "ccompiler")(mod) mod = transform.PartitionGraph()(mod) expected_mod = expected() assert relay.alpha_equal(mod, expected_mod) y_data = np.random.rand(8, 8).astype('float32') np_add = ones + y_data check_result(mod, {"y": y_data}, (8, 8), np.log(np_add))
def test_extern_dnnl(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return dtype = 'float32' ishape = (1, 32, 14, 14) w1shape = (32, 1, 3, 3) data = relay.var('data', shape=(ishape), dtype=dtype) weight1 = relay.var('weight1', shape=(w1shape), dtype=dtype) depthwise_conv2d_1 = relay.nn.conv2d(data, weight1, kernel_size=(3, 3), padding=(1, 1), groups=32) depthwise_conv2d_2 = relay.nn.conv2d(depthwise_conv2d_1, weight1, kernel_size=(3, 3), padding=(1, 1), groups=32) out = relay.add(depthwise_conv2d_1, depthwise_conv2d_2) f = relay.Function([data, weight1], out) mod = relay.Module() mod['main'] = WholeGraphAnnotator('dnnl').visit(f) mod = transform.PartitionGraph()(mod) ref_mod = relay.Module() ref_mod['main'] = f i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, w1shape).astype(dtype) ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu()) ref_res = ref_ex.evaluate()(i_data, w1_data) check_result(mod, { "data": i_data, "weight1": w1_data }, (1, 32, 14, 14), ref_res.asnumpy(), tol=1e-5)
def test_extern_dnnl_mobilenet(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (1, 3, 224, 224) mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype="float32") mod["main"] = relay.build_module.bind_params_by_name(mod["main"], params) mod = transform.AnnotateTarget("dnnl")(mod) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype="float32") ref_res = relay.create_executor("graph", mod=ref_mod, device=tvm.cpu(0)).evaluate()( i_data, **params ) check_result(mod, {"data": i_data}, (1, 1000), ref_res.numpy(), tol=1e-5, params=params)
def test_constant_propagation(): ones = np.ones(shape=(8, 8), dtype="float32") def expected(): mod = tvm.IRModule() y = relay.var("y", shape=(8, 8)) x0 = relay.const(ones) y0 = relay.var("y0", shape=(8, 8)) add = x0 + y0 # Function that uses C compiler func = relay.Function([y0], add) func = set_func_attr(func, "ccompiler", "ccompiler_0") glb_0 = relay.GlobalVar("ccompiler_0") mod[glb_0] = func mod = relay.transform.InferType()(mod) add_call = relay.Call(glb_0, [y]) log = relay.log(add_call) main = relay.Function([y], log) mod["main"] = main mod = relay.transform.InferType()(mod) return mod x = relay.var("x", shape=(8, 8)) y = relay.var("y", shape=(8, 8)) add = x + y log = relay.log(add) f = relay.Function([x, y], log) f = bind_params_by_name(f, {"x": tvm.nd.array(ones)}) mod = tvm.IRModule() mod["main"] = f mod = WhiteListAnnotator(["add"], "ccompiler")(mod) mod = transform.PartitionGraph()(mod) mod = relay.transform.InferType()(mod) expected_mod = expected() expected_mod = relay.transform.InferType()(expected_mod) assert tvm.ir.structural_equal(mod, expected_mod, map_free_vars=True) y_data = np.random.rand(8, 8).astype("float32") np_add = ones + y_data check_result(mod, {"y": y_data}, (8, 8), np.log(np_add))
def test_tuple_output_exec(): """Test C codegen and runtime for a subgraph with a tuple output""" a = relay.var('a', shape=(10, 10), dtype='float32') b = relay.var('b', shape=(10, 10), dtype='float32') ba = relay.annotation.compiler_begin(a, 'ccompiler') bb = relay.annotation.compiler_begin(b, 'ccompiler') add = relay.add(ba, bb) sub = relay.subtract(ba, bb) out = relay.Tuple((add, sub)) eout = relay.annotation.compiler_end(out, 'ccompiler') func=relay.Function([a, b], eout) mod = tvm.IRModule() mod["main"] = func mod = transform.PartitionGraph()(mod) a_data = np.random.rand(10, 10).astype('float32') b_data = np.random.rand(10, 10).astype('float32') check_result(mod, {'a': a_data, 'b': b_data}, [(10, 10), (10, 10)], [(a_data + b_data), (a_data - b_data)])
def test_load_params_with_constants_in_ext_codegen(): # After binding params and partitioning graph_module.get_params() # might contain parameters that are not an graph runtime input but # for example constants in external function. y_in = np.ones((1,)).astype("float32") params = {"y": y_in} mod = tvm.IRModule() x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1,)) xcb = compiler_begin(x, "ccompiler") ycb = compiler_begin(y, "ccompiler") z = relay.add(xcb, ycb) zce = compiler_end(z, "ccompiler") mod["main"] = relay.Function([x, y], zce) mod["main"] = bind_params_by_name(mod["main"], params) mod = transform.PartitionGraph()(mod) graph_module = relay.build(mod, target="llvm", params=params) lib = update_lib(graph_module.get_lib()) rt_mod = tvm.contrib.graph_runtime.create(graph_module.get_json(), lib, tvm.cpu(0)) rt_mod.load_params(runtime.save_param_dict(graph_module.get_params()))
def partition(dpu_target): data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32")) weight = relay.var("weight", relay.TensorType((16, 3, 3, 3), "float32")) bn_gamma = relay.var("bn_gamma", relay.TensorType((16, ), "float32")) bn_beta = relay.var("bn_beta", relay.TensorType((16, ), "float32")) bn_mmean = relay.var("bn_mean", relay.TensorType((16, ), "float32")) bn_mvar = relay.var("bn_var", relay.TensorType((16, ), "float32")) conv = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3), channels=16, padding=(1, 1)) bn_output = relay.nn.batch_norm(conv, bn_gamma, bn_beta, bn_mmean, bn_mvar) func = relay.Function( [data, weight, bn_gamma, bn_beta, bn_mmean, bn_mvar], bn_output.astuple()) mod = tvm.IRModule() mod["main"] = func params = {} params["weight"] = np.random.rand(16, 3, 3, 3).astype("float32") params["bn_gamma"] = np.random.rand(16).astype("float32") params["bn_beta"] = np.random.rand(16).astype("float32") params["bn_mean"] = np.random.rand(16).astype("float32") params["bn_var"] = np.random.rand(16).astype("float32") mod = annotation(mod, params, dpu_target) opt_pass = tvm.transform.Sequential([ transform.MergeCompilerRegions(), transform.PartitionGraph(), ]) with tvm.transform.PassContext(opt_level=3): mod = opt_pass(mod) return mod
def partition_for_bnns(mod, params=None): """Partition the graph greedily offloading supported operators to BNNS. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- ret : annotated and partitioned module. """ if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential( [ transform.InferType(), transform.FoldConstant(), transform.FoldScaleAxis(), transform.DynamicToStatic(), transform.AlterOpLayout(), # TODO(apeskov): WA. AlterOpLayout call lead to constants shape transformation # Some expand_dims op may appears after constants. It breaks BNNS fusing. # So we have to call FoldConstant right before bnns composite passes. transform.FoldConstant(), transform.MergeComposite(get_pattern_table("bnns")), transform.AnnotateTarget("bnns"), # If you no need in per layer performance statistic you can # uncomment next line # transform.MergeCompilerRegions(), transform.PartitionGraph(), ] ) return seq(mod)
def test_extern_dnnl_mobilenet(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return dtype = 'float32' ishape = (1, 3, 224, 224) mod, params = relay.testing.mobilenet.get_workload( batch_size=1, dtype='float32') op_list = ["nn.conv2d", "nn.dense", "nn.relu", "add"] mod = WhiteListAnnotator(op_list, "dnnl")(mod) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype='float32') ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) ref_res = ref_ex.evaluate()(i_data, **params) check_result(mod, {"data": i_data}, (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params)
def partition_for_ethosn77(mod, params=None, **opts): """Partition the graph greedily offloading supported operators to Arm Ethos-N NPU. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- ret : annotated and partitioned module. """ if opts: tops = opts.get("tops", None) ple_ratio = opts.get("ple_ratio", None) sram_size = opts.get("sram_size", None) if tops or ple_ratio or sram_size: raise ValueError( "Setting tops, ple_ratio or sram_size has no effect when targeting Ethos(TM)-N77" ) if params: mod["main"] = bind_params_by_name(mod["main"], params) seq = tvm.transform.Sequential( [ transform.InferType(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("ethos-n"), transform.MergeCompilerRegions(), transform.PartitionGraph(), ] ) return seq(mod)
def partition_for_arm_compute_lib(mod, params=None): """Partition the graph greedily offloading supported operators to Arm Compute Library. Parameters ---------- mod : Module The module to run passes on. params : Optional[Dict[str, NDArray]] Constant input parameters. Returns ------- ret : annotated and partitioned module. """ if params: mod['main'] = bind_params_by_name(mod['main'], params) seq = tvm.transform.Sequential([transform.MergeComposite(arm_compute_lib_pattern_table()), transform.AnnotateTarget('arm_compute_lib'), transform.PartitionGraph()]) return seq(mod)