def get_conv2d(batch_size, IC, HW, OC, KHKW, Pad, Stride, layout="NCHW", dtype="float32"): from tvm.relay.testing.init import create_workload from tvm.relay.testing import layers data_layout = layout kernel_layout = "OIHW" if layout == "NCHW" else "HWIO" data_shape = (batch_size, IC, HW, HW) data = relay.var("data", shape=data_shape, dtype=dtype) net = layers.conv2d(data=data, channels=OC, kernel_size=(KHKW, KHKW), strides=(Stride, Stride), padding=(Pad, Pad), name="conv2d_profile", data_layout=data_layout, kernel_layout=kernel_layout) return create_workload(net)
def main(): net = lenet() mod, params = init.create_workload(net) print(f"Module: {mod}") for p in params.keys(): print(f"Key: {p}, shape: {params[p].shape}")
def test_cpu_get_graph_params_compare(): # Create sample net from tvm.relay.testing.init import create_workload, Constant inp_shape = (1, 3, 24, 12) dtype = "float32" data = relay.var("data", shape=inp_shape, dtype=dtype) conv_shape = [inp_shape[1], inp_shape[1], 3, 3] conv = relay.nn.conv2d( data, relay.var("conv_weight", shape=conv_shape, dtype=dtype), padding=1, kernel_size=3, ) args = relay.analysis.free_vars(conv) func = relay.Function(args, conv) mod, params = create_workload(func, initializer=Constant()) with tvm.transform.PassContext(opt_level=3): complied_graph_lib = relay.build_module.build(mod, "llvm", params=params) from tvm.contrib import utils temp = utils.tempdir() file_name = "deploy_lib.so" path_lib = temp.relpath(file_name) complied_graph_lib.export_library(path_lib) loaded_lib = tvm.runtime.load_module(path_lib) loaded_params = loaded_lib["get_graph_params"]() tvm.testing.assert_allclose( params["conv_weight"].numpy(), loaded_params["p0"].numpy()[0][0], atol=1e-5 )
def codegen_yolo(): net = yolo() mod, params = init.create_workload(net) gen_code(mod, params, data_shape=(1, 3, 416, 416), out_shape=(1, 125, 14, 14))
def test_yolo(): net = yolo() mod, params = init.create_workload(net) benchmark_execution(mod, params, data_shape=(1, 3, 416, 416), out_shape=(1, 125, 14, 14))
def get_workload(batch_size=1, image_shape=(3, 224, 224), dtype="float32", **kwargs): net = get_net(batch_size=batch_size, image_shape=image_shape, dtype=dtype, **kwargs) return create_workload(net)
def get_workload(batch_size=1, num_classes=10, image_shape=(1, 28, 28), dtype="float32", **kwargs): net = get_net(batch_size=batch_size, num_classes=num_classes, image_shape=image_shape, dtype=dtype, **kwargs) return create_workload(net)
def execute_graph(net, print_mod=True, print_params=True, benchmark=True): mod, params = init.create_workload(net) if print_mod: print(f"Module: {mod}") if print_params: for p in params.keys(): print(f"Key: {p}, shape: {params[p].shape}") if benchmark: # benchmark_execution(mod, params, data_shape=(1, 3, 416, 416), out_shape=(1, 125, 14, 14)) # benchmark_execution(mod, params, data_shape=(1, 3, 416, 416), out_shape=(1, 125, 14, 14)) benchmark_execution(mod, params, data_shape=(1, 3, 224, 224), out_shape=(1,1000))
def get_workload(batch_size, image_shape, out_channel, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), dtype="float32"): data_shape = (batch_size, *image_shape) op = get_operator(data_shape, out_channel, kernel_size, strides, padding, dtype=dtype) sym, params = create_workload(op) return sym, params, data_shape
def get_conv2d_depthwise(conv_type, batch_size, IC, HW, OC, KHKW, Pad, Stride, layout="NCHW", dtype="float32"): from tvm.relay.testing.init import create_workload from tvm.relay.testing import layers data_layout = layout kernel_layout = "OIHW" if layout == "NCHW" else "HWIO" data_shape = (batch_size, IC, HW, HW) data = relay.var("data", shape=data_shape, dtype=dtype) if 'conv' in conv_type: net = layers.conv2d(data=data, channels=OC, kernel_size=(KHKW, KHKW), strides=(Stride, Stride), padding=(Pad, Pad), name="conv2d_profile", data_layout=data_layout, kernel_layout=kernel_layout) elif 'depthwise' in conv_type: print("build depthwise net") net = layers.conv2d(data=data, channels=OC, groups=OC, kernel_size=(KHKW, KHKW), strides=(Stride, Stride), padding=(Pad, Pad), name="depthwise_profile", data_layout=data_layout, kernel_layout=kernel_layout) else: print("err : not correct conv type") return create_workload(net)
def lenet(num_classes=10, data_shape=(1, 1, 32, 32), dtype='float32', alpha=1.0, is_shallow=False): """Function to construct a MobileNet""" data = relay.var("data", shape=data_shape, dtype=dtype) conv1 = layers.conv2d(data=data, channels=6, kernel_size=(5, 5), name='conv1') conv1 = relay.nn.relu(conv1) pool2 = relay.nn.avg_pool2d(conv1, pool_size=(2, 2), strides=(2, 2)) conv3 = layers.conv2d(data=pool2, channels=16, kernel_size=(5, 5), name='conv3') conv3 = relay.nn.relu(conv3) pool4 = relay.nn.avg_pool2d(conv3, pool_size=(2, 2), strides=(2, 2)) flattened5 = relay.nn.batch_flatten(pool4) fcw5 = relay.var('fc5_weight') fc5 = relay.nn.dense(data=flattened5, weight=fcw5, units=120) fc5 = relay.nn.relu(fc5) fcw6 = relay.var('fc6_weight') fc6 = relay.nn.dense(data=fc5, weight=fcw6, units=84) fc6 = relay.nn.relu(fc6) fcw7 = relay.var('fc7_weight') fc7 = relay.nn.dense(data=fc6, weight=fcw7, units=num_classes) fc7 = relay.nn.relu(fc7) softmax = relay.nn.softmax(data=fc7) fn = relay.Function(relay.analysis.free_vars(softmax), softmax) return init.create_workload(fn)
def get_network(name, batch_size, dtype='float32', ir='nnvm'): """Get the symbol definition and random weight of a network Parameters ---------- name: str The name of the network, can be 'resnet-18', 'resnet-50', 'vgg-16', 'inception_v3', 'mobilenet', ... batch_size: int batch size dtype: str Data type Returns ------- net: nnvm.symbol The NNVM symbol of network definition params: dict The random parameters for benchmark input_shape: tuple The shape of input tensor """ if ir == 'relay': from tvm.relay import testing elif ir == 'nnvm': from nnvm import testing else: raise Exception( "ir must be `relay` or `nnvm`, but you used `{}`".format(ir)) input_shape = (batch_size, 3, 224, 224) if name == 'mobilenet': net, params = testing.mobilenet.get_workload(batch_size=batch_size, dtype=dtype) elif name == 'mobilenet_v2': net, params = testing.mobilenet_v2.get_workload(batch_size=batch_size, dtype=dtype) elif name == 'inception_v3': input_shape = (batch_size, 3, 299, 299) net, params = testing.inception_v3.get_workload(batch_size=batch_size, dtype=dtype) elif "resnet" in name: n_layer = int(name.split('-')[1]) net, params = testing.resnet.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype) elif "vgg" in name: n_layer = int(name.split('-')[1]) net, params = testing.vgg.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype) elif "densenet" in name: n_layer = int(name.split('-')[1]) net, params = testing.densenet.get_workload(num_layers=n_layer, batch_size=batch_size, dtype=dtype) elif "squeezenet" in name: version = name.split("_v")[1] net, params = testing.squeezenet.get_workload(batch_size=batch_size, version=version, dtype=dtype) elif name == 'custom': # an example for custom network # from tvm.relay.testing import init # net = relay.var('data') # net = relay.testing.layers.conv2d(net, channels=4, kernel_size=(3,3), padding=(1,1)) # net = relay.nn.batch_flatten(net) # net = relay.testing.layers.dense_add_bias(net, units=1000) # net, params = init.create_workload(net, batch_size, (3, 224, 224)) from tvm.relay.testing import init input_shape = (3, 224) net = relay.var('data', shape=input_shape) weight = relay.var('dense_weight', shape=(224, 224)) net = relay.nn.dense(net, weight) net = relay.Function(relay.ir_pass.free_vars(net), net) # net = relay.testing.layers.dense_add_bias(net, name="dense") net, params = init.create_workload(net) # simple networks for experimenting elif name == 'mlp': image_shape = (1, 28, 28) input_shape = (batch_size, ) + image_shape net, params = testing.mlp.get_workload(batch_size=batch_size, image_shape=image_shape) elif name == 'nature-dqn': image_shape = (4, 84, 84) input_shape = (batch_size, ) + image_shape net, params = testing.dqn.get_workload(batch_size=batch_size, image_shape=image_shape) elif name == 'dcgan': random_len = 100 input_shape = (batch_size, random_len) net, params = testing.dcgan.get_workload(batch_size, random_len=random_len) elif name == 'densenet': input_shape = (3, 64, 64) net, params = testing.densenet.get_workload(batch_size=batch_size) # elif name == 'mxnet': # # an example for mxnet model # from mxnet.gluon.model_zoo.vision import get_model # block = get_model('resnet18_v1', pretrained=True) # net, params = nnvm.frontend.from_mxnet(block) # net = nnvm.sym.softmax(net) else: raise ValueError("Unsupported network: " + name) return net, params, input_shape
def get_workload(data_shape, weight_shape, dtype="float32"): op = get_operator(data_shape, weight_shape, dtype=dtype) sym, params = create_workload(op) return sym, params, data_shape
def tvm_generic(N, H, W, C, kernel_size, K, stride=1, padding=0, dilation=1, groups=1, number=100, dev=0, timeout=4, target="llvm", trials=100): data_shape = (N, C, H, W) data = relay.var("data", shape=data_shape, dtype="float32") kernel_size = (kernel_size, kernel_size) stride = (stride, stride) padding = (padding, padding) body = layers.conv2d(data=data, channels=K, kernel_size=kernel_size, strides=stride, padding=padding, name="conv2d") op = relay.Function(relay.ir_pass.free_vars(body), body) sym, params = create_workload(op) tasks = autotvm.task.extract_from_program(op, target=target, params=params, ops=(relay.op.nn.conv2d, )) tuning_option = { "log_filename": "tvm_baseline_{}.log".format( (N, C, H, W, K, kernel_size, stride, padding, dilation, groups)), "tuner": "xgb", "early_stopping": 30, "measure_option": autotvm.measure_option( builder=autotvm.LocalBuilder(timeout=timeout), runner=autotvm.LocalRunner(number=number, repeat=1, timeout=timeout, min_repeat_ms=150), # runner=autotvm.RPCRunner( # '1080ti', # change the device key to your key # '0.0.0.0', 9190, # number=20, repeat=3, timeout=4, min_repeat_ms=150) ), } log_filename = tuning_option["log_filename"] tuner = tuning_option["tuner"] early_stopping = tuning_option["early_stopping"] measure_option = tuning_option["measure_option"] # only support one task assert len(tasks) == 1 for i, task in enumerate(tasks): prefix = "[Task %2d/%2d] " % (i + 1, len(tasks)) # create tuner if tuner == 'xgb' or tuner == 'xgb-rank': tuner_obj = XGBTuner(task, loss_type='rank') elif tuner == 'ga': tuner_obj = GATuner(task, pop_size=100) elif tuner == 'random': tuner_obj = RandomTuner(task) elif tuner == 'gridsearch': tuner_obj = GridSearchTuner(task) else: raise ValueError("Invalid tuner: " + tuner) # do tuning n_trial = trials length = len(task.config_space) print("config space length=", length) # tuner_obj.tune(n_trial=min(n_trial, length), # early_stopping=early_stopping, # measure_option=measure_option, # callbacks=[ # autotvm.callback.progress_bar(n_trial, prefix=prefix), # autotvm.callback.log_to_file(log_filename)]) if not os.path.exists(log_filename): raise RuntimeError( "the log file {} doesn't exists".format(log_filename)) with autotvm.apply_history_best(log_filename): with relay.build_config(opt_level=3): graph, lib, params = relay.build_module.build(op, target=target, params=params) ctx = tvm.device(str(target), 0) data_tvm = tvm.nd.array( (np.random.uniform(size=data_shape)).astype("float32")) module = runtime.create(graph, lib, ctx) module.set_input("data", data_tvm) module.set_input(**params) # evaluate ftimer = module.module.time_evaluator("run", ctx, number=number, repeat=1) prof_res = np.array(ftimer().results) * 1e3 return prof_res
def yolo(): inp = relay.var("data", shape=(1, 3, 416, 416)) conv0 = layers.conv2d(name="conv0", data=inp, channels=16, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) b0 = relay.var("b0_bias") bias0 = relay.nn.bias_add(conv0, b0) bn0 = layers.batch_norm_infer(bias0, name="bn0") a1 = relay.nn.leaky_relu(bn0, alpha=0.1) p2 = relay.nn.max_pool2d(a1, pool_size=(2, 2), strides=(2, 2)) conv3 = layers.conv2d(name="conv3", data=p2, channels=32, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) b3 = relay.var("b3_bias") bias3 = relay.nn.bias_add(conv3, b3) bn3 = layers.batch_norm_infer(bias3, name="bn3") a4 = relay.nn.leaky_relu(bn3, alpha=0.1) p5 = relay.nn.max_pool2d(a4, pool_size=(2, 2), strides=(2, 2)) conv6 = layers.conv2d(name="conv6", data=p5, channels=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) b6 = relay.var("b6_bias") bias6 = relay.nn.bias_add(conv6, b6) bn6 = layers.batch_norm_infer(bias6, name="bn6") a7 = relay.nn.leaky_relu(bn6, alpha=0.1) p8 = relay.nn.max_pool2d(a7, pool_size=(2, 2), strides=(2, 2)) conv9 = layers.conv2d(name="conv9", data=p8, channels=128, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) b9 = relay.var("b9_bias") bias9 = relay.nn.bias_add(conv9, b9) bn9 = layers.batch_norm_infer(bias9, name="bn9") a10 = relay.nn.leaky_relu(bn9, alpha=0.1) p11 = relay.nn.max_pool2d(a10, pool_size=(2, 2), strides=(2, 2)) conv12 = layers.conv2d(name="conv12", data=p11, channels=256, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) b12 = relay.var("b12_bias") bias12 = relay.nn.bias_add(conv12, b12) bn12 = layers.batch_norm_infer(bias12, name="bn12") a13 = relay.nn.leaky_relu(bn12, alpha=0.1) p14 = relay.nn.max_pool2d(a13, pool_size=(2, 2), strides=(2, 2)) conv15 = layers.conv2d(name="conv15", data=p14, channels=512, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) b15 = relay.var("b15_bias") bias15 = relay.nn.bias_add(conv15, b15) bn15 = layers.batch_norm_infer(bias15, name="bn15") a16 = relay.nn.leaky_relu(bn15, alpha=0.1) p17 = relay.nn.max_pool2d(a16, pool_size=(2, 2), strides=(1, 1), padding=(0, 0)) conv18 = layers.conv2d(name="conv18", data=p17, channels=1024, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) b18 = relay.var("b18_bias") bias18 = relay.nn.bias_add(conv18, b18) bn18 = layers.batch_norm_infer(bias18, name="bn18") a19 = relay.nn.leaky_relu(bn18, alpha=0.1) conv20 = layers.conv2d(name="conv20", data=a19, channels=1024, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1)) b20 = relay.var("b20_bias") bias20 = relay.nn.bias_add(conv20, b20) bn20 = layers.batch_norm_infer(bias20, name="bn20") a21 = relay.nn.leaky_relu(bn20, alpha=0.1) conv22 = layers.conv2d(name="conv22", data=a21, channels=125, kernel_size=(1, 1), strides=(1, 1), padding=(1, 1)) b22 = relay.var("b22_bias") bias22 = relay.nn.bias_add(conv22, b22) final = relay.op.add(bias22, relay.const(1.0)) fn = relay.Function(relay.analysis.free_vars(final), final) return init.create_workload(fn)