def test_alter_conv2d_layout(): data = sym.Variable("data", shape=(1, 32, 512, 512)) conv = sym.conv2d(data, name="conv", channels=16, kernel_size=(3, 3), padding=(1, 1), use_bias=False, layout="NCHW") relu = sym.relu(conv, name="relu") flatten = sym.flatten(relu, name="flatten") softmax = sym.softmax(flatten, name="softmax") g = graph.create(softmax) g = g.apply("CorrectLayout") g = graph_attr.set_dtype_inputs(g, "float32") g = g.apply(["InferShape", "InferType"]) layouts_origin = get_layouts(g) @reg.register_alter_op_layout("conv2d") def alter_conv2d_layout(attrs, inputs, tinfos): new_attrs = {k: attrs[k] for k in attrs.keys()} new_attrs["layout"] = "NCHW16c" new_attrs["kernel_layout"] = "NCHW16c" new_attrs["name"] = "conv_alter" return sym.conv2d(inputs[0], inputs[1], **new_attrs) g = g.apply("AlterOpLayout") layouts = get_layouts(g) # check copy layouts for node in ["data", "relu", "flatten", "softmax", "conv_weight"]: assert (layouts[node] == layouts_origin[node]) assert (layouts["conv_alter"] == layouts_origin["conv"])
def test_consecutive_alter_layout(): data = sym.Variable("data", shape=(1, 32, 512, 512)) pool1 = sym.global_avg_pool2d(data, name="global_avg_pool2d_1", layout="NCHW") pool2 = sym.global_avg_pool2d(pool1, name="global_avg_pool2d_2", layout="NCHW") relu = sym.relu(pool2, name="relu") g = graph.create(relu) g = g.apply("CorrectLayout") g = graph_attr.set_dtype_inputs(g, "float32") g = g.apply(["InferShape", "InferType"]) assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW'] @reg.register_alter_op_layout("global_avg_pool2d", level=100) def alter_global_avg_pool2d_layout(attrs, inputs, tinfos): new_attrs = {k: attrs[k] for k in attrs.keys()} new_attrs["layout"] = "NCHW16c" return sym.global_avg_pool2d(inputs[0], **new_attrs) g = g.apply("AlterOpLayout") # pool1 get replaced - output layout of pool1 is not recorded # pool2 get replaced - input layout of pool2 is not recorded # thus the second entry must be undefined - it can neither recover from pool1's output, # nor from pool2's input. assert g.json_attr("layout") == ['NCHW', '__undef__', 'NCHW', 'NCHW']
def test_alter_func_return_none(): data = sym.Variable("data", shape=(1, 32, 512, 512)) pool1 = sym.global_max_pool2d(data, name="pool1", layout="NCHW") pool2 = sym.global_max_pool2d(pool1, name="pool2", layout="NCHW") relu = sym.relu(pool2, name="relu") g = graph.create(relu) g = g.apply("CorrectLayout") g = graph_attr.set_dtype_inputs(g, "float32") g = g.apply(["InferShape", "InferType"]) assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW'] @reg.register_alter_op_layout("global_max_pool2d", level=100) def alter_global_max_pool2d_layout(attrs, inputs, tinfos): return None g = g.apply("AlterOpLayout") # alter func return none, nothing get replaced, # the layouts should remain the same assert g.json_attr("layout") == ['NCHW', 'NCHW', 'NCHW', 'NCHW']
def generate_graph(graph_fn, params_fn, device="vta"): # Measure build start time build_start = time.time() # Derive the TVM target target = tvm.target.create("llvm -device={}".format(device)) # Derive the LLVM compiler flags # When targetting the Pynq, cross-compile to ARMv7 ISA if env.TARGET == "sim": target_host = "llvm" elif env.TARGET == "pynq": target_host = "llvm -mtriple=armv7-none-linux-gnueabihf -mcpu=cortex-a9 -mattr=+neon" # Load the ResNet-18 graph and parameters sym = nnvm.graph.load_json(open(graph_fn).read()) params = nnvm.compiler.load_param_dict(open(params_fn, 'rb').read()) # Populate the shape and data type dictionary shape_dict = {"data": (1, 3, 224, 224)} dtype_dict = {"data": 'float32'} shape_dict.update({k: v.shape for k, v in params.items()}) dtype_dict.update({k: str(v.dtype) for k, v in params.items()}) # Create NNVM graph graph = nnvm.graph.create(sym) graph_attr.set_shape_inputs(sym, shape_dict) graph_attr.set_dtype_inputs(sym, dtype_dict) graph = graph.apply("InferShape").apply("InferType") # Apply NNVM graph optimization passes sym = vta.graph.clean_cast(sym) sym = vta.graph.clean_conv_fuse(sym) if target.device_name == "vta": assert env.BLOCK_IN == env.BLOCK_OUT sym = vta.graph.pack(sym, shape_dict, env.BATCH, env.BLOCK_OUT) # Compile NNVM graph with nnvm.compiler.build_config(opt_level=3): if target.device_name != "vta": graph, lib, params = nnvm.compiler.build( sym, target, shape_dict, dtype_dict, params=params, target_host=target_host) else: with vta.build_config(): graph, lib, params = nnvm.compiler.build( sym, target, shape_dict, dtype_dict, params=params, target_host=target_host) # Save the compiled inference graph library assert tvm.module.enabled("rpc") temp = util.tempdir() lib.save(temp.relpath("graphlib.o")) # Send the inference library over to the remote RPC server remote.upload(temp.relpath("graphlib.o")) lib = remote.load_module("graphlib.o") # Measure build time build_time = time.time() - build_start print("ResNet-18 inference graph built in {0:.2f}s!".format(build_time)) return graph, lib, params
###################################################################### # now compile the graph import nnvm.compiler np.random.seed(0) sym = nnvm.graph.load_json(open(os.path.join(RESNET_GRAPH_FILE)).read()) params = pickle.load(open(os.path.join(RESNET_PARAMS_FILE))) dt = time.time() shape_dict = {"data": img.shape} dtype_dict = {"data": 'float32'} shape_dict.update({k: v.shape for k, v in params.items()}) dtype_dict.update({k: str(v.dtype) for k, v in params.items()}) graph = nnvm.graph.create(sym) graph_attr.set_shape_inputs(sym, shape_dict) graph_attr.set_dtype_inputs(sym, dtype_dict) graph = graph.apply("InferShape").apply("InferType") dtype = "float32" sym = vta.graph.remove_stochastic(sym) sym = vta.graph.clean_cast(sym) sym = vta.graph.clean_conv_fuse(sym) if "vta" in target: sym = vta.graph.pack(sym, shape_dict, factor) graph_attr.set_shape_inputs(sym, shape_dict) sym = sym.apply("InferShape") graph_attr.set_dtype_inputs(sym, dtype_dict) sym = sym.apply("InferType") timers['execution_time_prepare_graph'] = time.time() - dt