示例#1
0
def test_save_load():
    x = np.ones((10, 2)).astype("float32")
    y = np.ones((1, 2, 3)).astype("float32")
    params = {"x": x, "y": y}
    param_bytes = relay.save_param_dict(params)
    assert isinstance(param_bytes, bytearray)
    param2 = relay.load_param_dict(param_bytes)
    assert len(param2) == 2
    np.testing.assert_equal(param2["x"].asnumpy(), x)
    np.testing.assert_equal(param2["y"].asnumpy(), y)
示例#2
0
def test_ndarray_reflection():
    # Make two `NDArrayWrapper`s that point to the same underlying array.
    np_array = np.random.uniform(size=(10, 2)).astype("float32")
    tvm_array = tvm.nd.array(np_array)
    param_dict = {'x': tvm_array, 'y': tvm_array}
    assert param_dict['x'].same_as(param_dict['y'])
    # Serialize then deserialize `param_dict`.
    deser_param_dict = relay.load_param_dict(relay.save_param_dict(param_dict))
    # Make sure the data matches the original data and `x` and `y` contain the same data.
    np.testing.assert_equal(deser_param_dict['x'].asnumpy(), tvm_array.asnumpy())
    # Make sure `x` and `y` contain the same data.
    np.testing.assert_equal(deser_param_dict['x'].asnumpy(), deser_param_dict['y'].asnumpy())
示例#3
0
    def verify_graph_runtime(remote, target, shape, dtype):
        x = relay.var('x')
        y = relay.const(1)
        z = relay.add(x, y)
        func = relay.Function([x], z)

        x_in = np.ones(shape).astype(dtype)
        params = {'x': x_in}
        graph, lib, params = relay.build(func, target=target, params=params)

        temp = util.tempdir()
        path_dso = temp.relpath("dev_lib.o")
        lib.save(path_dso)
        remote.upload(path_dso)
        lib = remote.load_module("dev_lib.o")
        ctx = remote.cpu(0)
        mod = graph_runtime.create(graph, lib, ctx)
        mod.load_params(relay.save_param_dict(params))
        mod.run()
        out = mod.get_output(0, tvm.nd.empty(shape, dtype=dtype, ctx=ctx))
        tvm.testing.assert_allclose(x_in + 1, out.asnumpy())
示例#4
0
def test_build():
    m_bld = BuildModule()
    tgt_name = "llvm"
    tgt = "llvm"
    ctx = tvm.cpu()
    # func
    a = relay.var("a", dtype="float32", shape=(16, 8))
    b = relay.var("b", dtype="float32", shape=(8, 8))
    c = relay.var("c", dtype="float32", shape=(16, 8))
    x = relay.nn.dense(a, b)
    y = relay.nn.relu(x)
    z = y + c
    func = relay.Function([a, b, c], z)
    A = tvm.nd.array(np.random.uniform(-1, 1, (16, 8)).astype("float32"), ctx=ctx)
    B = tvm.nd.array(np.random.uniform(-1, 1, (8, 8)).astype("float32"), ctx=ctx)
    C = tvm.nd.array(np.random.uniform(-1, 1, (16, 8)).astype("float32"), ctx=ctx)
    params = {
        "b" : B,
        "c" : C
    }
    # build
    targets = {
        tgt: tgt
    }
    m_bld.set_opt_level(3)
    m_bld.build(func, targets, "llvm", params=params)
    g_json = m_bld.get_json()
    mmod = m_bld.get_module()
    params = m_bld.get_params()
   
    # test
    rt = tvm.contrib.graph_runtime.create(g_json, mmod, ctx)
    rt.set_input("a", A)
    rt.load_params(relay.save_param_dict(params))
    rt.run()
    out = rt.get_output(0)
   
    np.testing.assert_allclose(out.asnumpy(),
        np.maximum(np.dot(A.asnumpy(), B.asnumpy().T), 0) + C.asnumpy(), atol=1e-5, rtol=1e-5)
示例#5
0
def build_module(opts):
    dshape = (1, 3, 224, 224)
    from mxnet.gluon.model_zoo.vision import get_model
    block = get_model('mobilenet0.25', pretrained=True)
    shape_dict = {'data': dshape}
    mod, params = relay.frontend.from_mxnet(block, shape_dict)
    func = mod["main"]
    func = relay.Function(func.params, relay.nn.softmax(func.body), None,
                          func.type_params, func.attrs)

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(func,
                                         'llvm --system-lib',
                                         params=params)

    build_dir = os.path.abspath(opts.out_dir)
    if not os.path.isdir(build_dir):
        os.makedirs(build_dir)

    lib.save(os.path.join(build_dir, 'model.o'))
    with open(os.path.join(build_dir, 'graph.json'), 'w') as f_graph_json:
        f_graph_json.write(graph)
    with open(os.path.join(build_dir, 'params.bin'), 'wb') as f_params:
        f_params.write(relay.save_param_dict(params))
示例#6
0
def build_module(opts):
    dshape = (1, 3, 224, 224)
    from mxnet.gluon.model_zoo.vision import get_model

    block = get_model("mobilenet0.25", pretrained=True)
    shape_dict = {"data": dshape}
    mod, params = relay.frontend.from_mxnet(block, shape_dict)
    func = mod["main"]
    func = relay.Function(func.params, relay.nn.softmax(func.body), None,
                          func.type_params, func.attrs)

    for runtime_name, file_format_str in RUNTIMES.items():
        with tvm.transform.PassContext(opt_level=3,
                                       config={"tir.disable_vectorize": True}):
            graph, lib, params = relay.build(
                func,
                f"llvm --runtime={runtime_name} --system-lib",
                params=params)

        build_dir = os.path.abspath(opts.out_dir)
        if not os.path.isdir(build_dir):
            os.makedirs(build_dir)

        lib.save(
            os.path.join(build_dir,
                         file_format_str.format(name="model", ext="o")))
        with open(
                os.path.join(build_dir,
                             file_format_str.format(name="graph", ext="json")),
                "w") as f_graph_json:
            f_graph_json.write(graph)
        with open(
                os.path.join(build_dir,
                             file_format_str.format(name="params", ext="bin")),
                "wb") as f_params:
            f_params.write(relay.save_param_dict(params))
示例#7
0
def main():
    dshape = (1, 28, 28)
    net, params = relay.testing.mlp.get_workload(batch_size=dshape[0],
                                                 dtype="float32")

    dshape = (1, 3, 224, 224)
    net, params = relay.testing.resnet.get_workload(layers=18,
                                                    batch_size=dshape[0],
                                                    image_shape=dshape[1:])

    with tvm.transform.PassContext(opt_level=3):
        graph, lib, params = relay.build(net,
                                         "llvm --system-lib",
                                         params=params)

    build_dir = osp.abspath(sys.argv[1])
    if not osp.isdir(build_dir):
        os.makedirs(build_dir, exist_ok=True)

    lib.save(osp.join(build_dir, "model.o"))
    with open(osp.join(build_dir, "graph.json"), "w") as f_graph_json:
        f_graph_json.write(graph)
        with open(osp.join(build_dir, "params.bin"), "wb") as f_params:
            f_params.write(relay.save_param_dict(params))
示例#8
0
def convert_tvm(config, torch_model, x):
    opt = config['opt']
    import onnx
    import tvm
    from tvm import relay

    onnx_model = onnx.load(opt.onnx_path)
    logger.info("[ONNX model loaded]")
    batch_size = x[0].shape[0]
    seq_len = x[0].shape[1]
    shape_dict = {
        'input_ids': (batch_size, seq_len),
        'input_mask': (batch_size, seq_len),
        'segment_ids': (batch_size, seq_len),
    }
    model, params = relay.frontend.from_onnx(onnx_model,
                                             shape_dict,
                                             opset=opt.onnx_opset)
    logger.info("[Converting to TVM done]")

    with open(os.path.join(opt.tvm_dir, 'model.json'), 'w') as fo:
        fo.write(tvm.ir.save_json(model))
    with open(os.path.join(opt.tvm_dir, 'model.params'), 'wb') as fo:
        fo.write(relay.save_param_dict(params))
示例#9
0
def build(target_dir):
    """ Compiles resnet18 with TVM"""
    deploy_lib = osp.join(target_dir, 'deploy_lib.o')
    if osp.exists(deploy_lib):
        return

    if args.pretrained:
        # needs mxnet installed
        from mxnet.gluon.model_zoo.vision import get_model

        # if `--pretrained` is enabled, it downloads a pretrained
        # resnet18 trained on imagenet1k dataset for image classification task
        block = get_model('resnet18_v1', pretrained=True)
        net, params = relay.frontend.from_mxnet(block, {"data": data_shape})
        # we want a probability so add a softmax operator
        net = relay.Function(net.params, relay.nn.softmax(net.body),
            None, net.type_params, net.attrs)
    else:
        # use random weights from relay.testing
        net, params = relay.testing.resnet.get_workload(
            num_layers=18, batch_size=batch_size, image_shape=image_shape)

    # compile the model
    with tvm.transform.PassContext(opt_level=opt_level):
        graph, lib, params = relay.build_module.build(net, target, params=params)

    # save the model artifacts
    lib.save(deploy_lib)
    cc.create_shared(osp.join(target_dir, "deploy_lib.so"),
                    [osp.join(target_dir, "deploy_lib.o")])

    with open(osp.join(target_dir, "deploy_graph.json"), "w") as fo:
        fo.write(graph)

    with open(osp.join(target_dir,"deploy_param.params"), "wb") as fo:
        fo.write(relay.save_param_dict(params))
示例#10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-o', '--out-dir', default='.')
    opts = parser.parse_args()

    dshape = (1, 3, 224, 224)
    net, params = relay.testing.resnet.get_workload(layers=18,
                                                    batch_size=dshape[0],
                                                    image_shape=dshape[1:])

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(net,
                                         'llvm --system-lib',
                                         params=params)

    build_dir = osp.abspath(opts.out_dir)
    if not osp.isdir(build_dir):
        os.makedirs(build_dir, exist_ok=True)

    lib.save(osp.join(build_dir, 'model.bc'))
    with open(osp.join(build_dir, 'graph.json'), 'w') as f_graph_json:
        f_graph_json.write(graph)
        with open(osp.join(build_dir, 'params.bin'), 'wb') as f_params:
            f_params.write(relay.save_param_dict(params))
示例#11
0
def test_fp16_build():
    dtype = "float16"

    if not tvm.runtime.enabled("cuda") or not tvm.gpu(0).exist:
        print("skip because cuda is not enabled.")
        return

    ctx = tvm.gpu(0)
    if dtype == "float16" and not have_fp16(ctx.compute_version):
        print("skip because gpu does not support fp16")
        return

    x = relay.var("x", dtype=dtype, shape=(4, 4))
    y = relay.var("y", dtype=dtype, shape=(4, 4))
    z = x + y
    func = relay.Function([x, y], z)
    X = tvm.nd.array(np.random.uniform(-1, 1, (4, 4)).astype(dtype), ctx=ctx)
    Y = tvm.nd.array(np.random.uniform(-1, 1, (4, 4)).astype(dtype), ctx=ctx)
    params = {
        "x": X,
        "y": Y,
    }

    # build
    g_json, mmod, params = relay.build(func, "cuda", params=params)

    # test
    rt = tvm.contrib.graph_runtime.create(g_json, mmod, ctx)
    rt.load_params(relay.save_param_dict(params))
    rt.run()
    out = rt.get_output(0)

    np.testing.assert_allclose(out.asnumpy(),
                               X.asnumpy() + Y.asnumpy(),
                               atol=1e-5,
                               rtol=1e-5)
示例#12
0
    def export_library(self, directory_path):
        """Export the pipeline executor into disk files.

        Parameters
        ----------
        directory_path : str
            Export the files to this directory.
        """
        if not self.pipeline_mods:
            raise RuntimeError(
                "The pipeline executor has not been initialized.")

        # Check if the directory_path exists.
        if not os.path.exists(directory_path):
            raise RuntimeError(
                "The directory {directory_path} does not exist.")
        # Create an load configuration.
        load_config_file_name = "{}/load_config".format(directory_path)
        pipeline_config_file_name = "{}/pipeline_config".format(directory_path)
        config = {}
        config["load_config"] = load_config_file_name
        config["pipeline_config"] = pipeline_config_file_name
        load_config = []
        # Export the library, JSON, and parameter into files, then export these files path
        # into a configuration file.
        for lib_index in self.pipeline_mods:
            mconfig = {}
            mconfig["mod_idx"] = lib_index
            mconfig["lib_name"] = "{}/lib{}.so".format(directory_path,
                                                       lib_index)
            mconfig["json_name"] = "{}/json{}".format(directory_path,
                                                      lib_index)
            mconfig["params_name"] = "{}/params{}".format(
                directory_path, lib_index)
            mconfig["dev"] = "{},{}".format(
                self.pipeline_mods[lib_index]["dev"].device_type,
                self.pipeline_mods[lib_index]["dev"].device_id,
            )

            # Get the graph, lib, and parameters from GraphExecutorFactoryModule.
            lib = self.pipeline_mods[lib_index]["lib"]
            # Export the lib, graph, and parameters to disk.
            lib.export_library(mconfig["lib_name"])
            with open(mconfig["json_name"], "w") as file_handle:
                file_handle.write(lib.graph_json)
            with open(mconfig["params_name"], "wb") as file_handle:
                file_handle.write(relay.save_param_dict(lib.params))

            load_config.append(mconfig)

        with open(load_config_file_name, "w") as file_handle:
            json.dump(load_config, file_handle)

        with open(pipeline_config_file_name, "w") as file_handle:
            json.dump(self.mods_config, file_handle)

        config_file_name = "{}/config".format(directory_path)
        with open(config_file_name, "w") as file_handle:
            json.dump(config, file_handle)

        return config_file_name
示例#13
0
    def test_simple_network(self):
        data = relay.var("data", relay.TensorType((-1, 3, 224, 224),
                                                  "float32"))
        weight = relay.var("weight")
        bn_gamma = relay.var("bn_gamma")
        bn_beta = relay.var("bn_beta")
        bn_mmean = relay.var("bn_mean")
        bn_mvar = relay.var("bn_var")

        simple_net = relay.nn.pad(data, ((0, 0), (0, 0), (1, 1), (1, 1)))
        simple_net = relay.nn.conv2d(data=simple_net,
                                     weight=weight,
                                     kernel_size=(3, 3),
                                     channels=16,
                                     padding=(0, 0))
        simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta,
                                         bn_mmean, bn_mvar)[0]
        simple_net = relay.nn.relu(simple_net)
        simple_net = relay.op.reduce.mean(simple_net, axis=(2, 3))
        simple_net = relay.op.transform.squeeze(simple_net)

        dense_weight = relay.var("dense_weight")
        dense_bias = relay.var('dense_bias')
        simple_net = relay.nn.dense(simple_net, weight=dense_weight, units=10)
        simple_net = relay.nn.bias_add(simple_net, dense_bias, axis=1)

        simple_net = relay.nn.softmax(simple_net, axis=1)
        simple_net = relay.op.transform.reshape(simple_net, newshape=(-1, 10))

        simple_net = relay.Function(relay.analysis.free_vars(simple_net),
                                    simple_net)

        mod, params = testing.create_workload(simple_net)

        json_file = os.path.join(FILE_DIR, "relay_mod_test.json")
        with open(json_file, 'w') as f:
            json.dump(tvm.ir.save_json(mod), f)

        params_file = os.path.join(FILE_DIR, "relay_params_test.params")
        with open(params_file, "wb") as fo:
            fo.write(relay.save_param_dict(params))

        mod_read, params_read = load_model_from_file('Relay', 'Relay')(
            model_path=json_file,
            shapes={
                'data': [-1, 3, 224, 224]
            },
            opt_model_path=params_file)

        xgraph = xf_relay.from_relay(mod_read, params_read)

        layers = xgraph.get_layers()

        assert layers[0].type[0] == 'Input'
        assert layers[1].type[0] == 'Pad'
        assert layers[2].type[0] == 'Convolution'
        assert layers[3].type[0] == 'BatchNorm'
        assert layers[4].type[0] == 'ReLU'
        assert layers[5].type[0] == 'Mean'
        assert layers[6].type[0] == 'Squeeze'
        assert layers[7].type[0] == 'Dense'
        assert layers[8].type[0] == 'BiasAdd'
        assert layers[9].type[0] == 'Softmax'
        assert layers[10].type[0] == 'Reshape'

        os.remove(json_file)
        os.remove(params_file)
示例#14
0
# -----------------------------
# We can also save the graph, lib and parameters into files and load them
# back in deploy environment.

####################################################

# save the graph, lib and params into separate files
from tvm.contrib import util

temp = util.tempdir()
path_lib = temp.relpath("deploy_lib.tar")
lib.export_library(path_lib)
with open(temp.relpath("deploy_graph.json"), "w") as fo:
    fo.write(graph)
with open(temp.relpath("deploy_param.params"), "wb") as fo:
    fo.write(relay.save_param_dict(params))
print(temp.listdir())

####################################################

# load the module back.
loaded_json = open(temp.relpath("deploy_graph.json")).read()
loaded_lib = tvm.module.load(path_lib)
loaded_params = bytearray(open(temp.relpath("deploy_param.params"), "rb").read())
input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32"))

module = graph_runtime.create(loaded_json, loaded_lib, ctx)
module.load_params(loaded_params)
module.run(data=input_data)
out_deploy = module.get_output(0).asnumpy()
示例#15
0
 def compile_graph(mod, params):
     with tvm.transform.PassContext(
             opt_level=3, config={"relay.ext.tensorrt.options": config}):
         graph, lib, params = relay.build(mod, params=params, target="cuda")
         params = relay.save_param_dict(params)
     return graph, lib, params
def tvm_compile(func, params, arch, dlr_model_name):
    ###arch x86_64
    if arch == 'x86_64':
        target = "llvm -model=N3350 -target=x86_64-linux-android -mattr=+ssse3,+sse4.2"
        sysroot = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/sysroot"
        toolchain = "/opt/android-ndk/toolchains/x86_64-4.9/prebuilt/linux-x86_64"
        os.environ[
            'TVM_NDK_CC'] = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/x86_64-linux-android28-clang++"
    ###arch x86 i686
    elif arch == 'x86':
        target = "llvm -model=x5-Z8350 -target=i686-linux-android -mattr=+ssse3"
        sysroot = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/sysroot"
        toolchain = "/opt/android-ndk/toolchains/x86-4.9/prebuilt/linux-x86_64"
        os.environ[
            'TVM_NDK_CC'] = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/i686-linux-android21-clang++"

    ###arch arm64 aarch64
    elif arch == 'arm64-v8a':
        target = "llvm -device=arm_cpu -model=SM8150 -target=aarch64-linux-android"
        sysroot = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/sysroot"
        toolchain = "/opt/android-ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64"
        os.environ[
            'TVM_NDK_CC'] = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android28-clang++"

    ###arch armv7
    ## More info on armv7 hard/soft abi for Android https://android.googlesource.com/platform/ndk/+/master/docs/HardFloatAbi.md
    elif arch == 'armeabi-v7a':
        target = "llvm -device=arm_cpu -model=MSM8940 -target=armv7a-linux-androideabi -mfloat-abi=soft -mattr=+neon,+thumb-mode"
        sysroot = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/sysroot"
        toolchain = "/opt/android-ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64"
        os.environ[
            'TVM_NDK_CC'] = "/opt/android-ndk/toolchains/llvm/prebuilt/linux-x86_64/bin/armv7a-linux-androideabi21-clang++"
    else:
        print("Valid arch: arm64-v8a, armeabi-v7a, x86_64, x86")
        return

    print('target:', target)
    print("Compiling...")

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(func, target, params=params)

    print("Compilation done")
    print("lib type_key: ", lib.type_key)

    print("Saving files")
    out_folder = arch + "/" + dlr_model_name + "/"
    os.makedirs(out_folder, exist_ok=True)
    # save the graph, lib and params into separate files
    path_lib = out_folder + "model.so"
    options = [
        "-shared", "-fPIC", "--sysroot", sysroot,
        "--gcc-toolchain=" + toolchain, "-static-libstdc++"
    ]
    lib.export_library(path_lib, ndk.create_shared, options=options)

    print("export_library done")

    with open(out_folder + "model.json", "w") as fo:
        fo.write(graph)
    with open(out_folder + "model.params", "wb") as fo:
        fo.write(relay.save_param_dict(params))

    print("Files saved to", out_folder)
示例#17
0
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, input_shape, _ = get_network(network, batch_size=1)
    tasks = autotvm.task.extract_from_program(mod["main"],
                                              target=target,
                                              target_host=target_host,
                                              params=params,
                                              ops=(relay.op.nn.conv2d, ))

    # run tuning tasks
    print("Tuning...")
    tune_tasks(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(
                mod, target=target, params=params, target_host=target_host)
        # export library
        tmp = tempdir()
        if use_android:
            from tvm.contrib import ndk
            filename = "{}.so".format(module_export_prefix)
            lib.export_library(tmp.relpath(filename), ndk.create_shared)
        else:
            filename = "{}.tar".format(module_export_prefix)
            lib.export_library(tmp.relpath(filename))

        lib.imported_modules[0].save(
            "{}-cuda.ptx".format(module_export_prefix))
        lib.export_library("{}-lib.tar".format(module_export_prefix))

        with open("{}-graph.json".format(module_export_prefix), "w") as fo:
            fo.write(graph)

        with open("{}-params.params".format(module_export_prefix), "wb") as fo:
            fo.write(relay.save_param_dict(params))

        # upload module to device
        print("Upload...")
        remote = autotvm.measure.request_remote(device_key,
                                                tracker_host,
                                                tracker_port,
                                                timeout=10000)
        remote.upload(tmp.relpath(filename))
        rlib = remote.load_module(filename)

        # upload parameters to device
        ctx = remote.context(str(target), 0)
        module = runtime.create(graph, rlib, ctx)
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=input_shape)).astype(dtype))
        module.set_input('data', data_tvm)
        module.set_input(**params)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=30)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
示例#18
0
def build_keyword_model(opts):
    from model.kws.kws import get_module, prepare_input

    model_input_name = 'Mfcc'
    shape_dict = {model_input_name: (1, 49, 10)}

    mod = get_module(opts.module)
    print(mod)

    params_data = None
    if opts.params:
        with open(opts.params, 'rb') as f_param:
            params_data = relay.load_param_dict(f_param.read())

    print("Compile...")
    if opts.tuned:
        history_file = opts.tuned
        print(f'INFO: Model tuning for with file {history_file}!')
        with autotvm.apply_history_best(history_file):
            with relay.build_config(opt_level=3):
                graph, lib, out_params = relay.build_module.build(
                    mod, target=TARGET, params=params_data)
    else:
        print("INFO: No Tuning!")
        with relay.build_config(opt_level=3):
            graph, lib, out_params = relay.build_module.build(
                mod, target=TARGET, params=params_data)

    #save model, graph, params
    model_name = 'keyword'
    lib.save(os.path.join(build_dir, f'{model_name}_model.o'))
    print(f'INFO: {model_name}_model.o saved!')
    with open(os.path.join(build_dir, f'{model_name}_graph.bin'),
              'wb') as f_graph:
        f_graph.write(bytes(graph, 'utf-8'))
        print(f'INFO: {model_name}_graph.bin saved!')
    with open(os.path.join(build_dir, f'{model_name}_graph.json'),
              'w') as f_graph_json:
        f_graph_json.write(graph)
        print(f'INFO: {model_name}_graph.json saved!')
    with open(os.path.join(build_dir, f'{model_name}_params.bin'),
              'wb') as f_params:
        f_params.write(relay.save_param_dict(out_params))
        print(f'INFO: {model_name}_params.bin saved!')

    #create input and result
    local_target = 'llvm --system-lib'
    with relay.build_config(opt_level=3):
        graph_test, lib_test, params_test = relay.build_module.build(
            mod, target=local_target)

    with open('build/graph.log', 'w') as f:
        f.write(str(graph))

    sample_file = 'python/model/kws/samples/silence.wav'
    input_data = prepare_input(sample_file)
    ctx = tvm.context(local_target, 0)
    m = tvm.contrib.graph_runtime.create(graph_test, lib_test, ctx)
    m.set_input('Mfcc', input_data)
    m.set_input(**params_test)
    m.run()
    predictions = m.get_output(0, tvm.nd.empty(((1, 12)), 'float32')).asnumpy()
    predictions = predictions[0]

    print(f'INFO: sample audio file used: {sample_file}')
    # save data and output
    with open(os.path.join(build_dir, f'{model_name}_data.bin'), "wb") as fp:
        fp.write(input_data.astype(np.float32).tobytes())
        print(f'INFO: {model_name}_data.bin saved!')
    with open(os.path.join(build_dir, f'{model_name}_output.bin'), "wb") as fp:
        fp.write(predictions.astype(np.float32).tobytes())
        print(f'INFO: {model_name}_output.bin saved!')

    generate_id()
示例#19
0
    if args.use_gpu:
        target = "cuda"
        ctx = tvm.gpu(0)
    else:
        target = "llvm"
        ctx = tvm.cpu()
    target_host = "llvm"
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod,
                          target=target,
                          target_host=target_host,
                          params=params)
    export_graph, export_lib, export_params = lib
    export_lib.export_library('compiled.so')
    with open('compiled.json', 'w') as f:
        f.write(export_graph)
    with open('compiled.params', 'wb') as f:
        f.write(relay.save_param_dict(export_params))
    print('export complete')

    if args.verbose:
        print('Running sanity check in tvm runtime')
        from tvm.contrib import graph_runtime
        dtype = "float32"
        m = graph_runtime.GraphModule(lib["default"](ctx))
        # Set inputs
        m.set_input(input_name,
                    tvm.nd.array(images.cpu().detach().numpy().astype(dtype)))
        tvm_result = m.run()
示例#20
0
def compile_via_tvm(sym, arg_params, aux_params, symbol_file, data_shape, tune):

    input_shape = [1] + list(data_shape)
    input_dict = {'data': input_shape}
    input_name = 'data'

    batch = 1
    seq_length = 128
    input_dict = {
        'data0': (batch, seq_length),
        'data1': (batch, seq_length),
        'data2': (batch,)
    }
    mod, params = relay.frontend.from_mxnet(sym,
                                            dtype={},
                                            shape=input_dict,
                                            arg_params=arg_params,
                                            aux_params=aux_params)

    model_name = symbol_file.split('/')[-1].replace('.json','')
    log_dir = os.getcwd() + "/tuned_logs_c5"
    pathlib.Path(log_dir).mkdir(parents=True, exist_ok=True)
    log_file = log_dir + "/" + "%s.log" % model_name
    graph_opt_sch_file = log_dir + "/" + "%s_graph_opt.log" % model_name

    Path(log_file).touch()
    Path(graph_opt_sch_file).touch()

    if tune:
        tuning_option = {
            'log_filename': log_file,
            'tuner': 'random',
            'early_stopping': None,

            'measure_option': autotvm.measure_option(
                builder=autotvm.LocalBuilder(),
                runner=autotvm.LocalRunner(number=10, repeat=1,
                                           min_repeat_ms=1000),
            ),
        }

        tune_and_evaluate(tuning_option, mod, params, input_shape, log_file,
                graph_opt_sch_file, input_name)

    # with autotvm.apply_graph_best(graph_opt_sch_file):
    with autotvm.apply_history_best(log_file):
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(
                mod, target=target, params=params)

            base_dir = os.getcwd() + "/compiled_models"
            pathlib.Path(base_dir).mkdir(parents=True, exist_ok=True)

            base = base_dir + '/tvm_' + symbol_file.split('/')[-1].replace('.json','')

            path_lib = base + '_deploy_lib.tar'
            path_graph =  base + '_deploy_graph.json'
            path_params = base + '_deploy_params.params'

            lib.export_library(path_lib)
            with open(path_graph, 'w') as fo:
                fo.write(graph)
            with open(path_params, 'wb') as fo:
                fo.write(relay.save_param_dict(params))
示例#21
0
def export_module(opts):
    # Target settings
    layout = "NCHW"

    # Download required files
    from tvm.contrib.download import download_testdata
    model_path = download_testdata(model_url, model_file_name, module=['tf', 'keyword_spotting'])
    label_path = download_testdata(label_url, label_name, module=['data'])

    # Import model
    with tf_compat_v1.gfile.GFile(model_path, 'rb') as f:
        graph_def = tf_compat_v1.GraphDef()
        graph_def.ParseFromString(f.read())
        graph = tf.import_graph_def(graph_def, name='')
        graph_def = tf_testing.ProcessGraphDefParam(graph_def)
        with tf_compat_v1.Session() as sess:
            graph_def = tf_testing.AddShapesToGraphDef(sess, 'labels_softmax')

    build_dir = opts.out_dir
    if not os.path.exists(build_dir):
        os.makedirs(build_dir)

    ##save original TF graph
    if DEBUG_LOG:
        with open(os.path.join(build_dir, f'{model_name}_graph_original.log'), 'w') as orig_file:
            orig_file.write(str(graph_def))

    ##remove pre-processing nodes and fix begining
    nodes = []
    ##add first op
    input_dim0 = 1
    input_dim1 = 49
    input_dim2 = 10
    new_input = graph_def.node.add()
    new_input.op = 'Placeholder'
    new_input.name = 'Mfcc'
    new_input.attr["dtype"].CopyFrom(attr_value_pb2.AttrValue(
            type=dtypes.float32.as_datatype_enum))
                
    nodes.append(new_input)

    removed_count = 0
    for ii, node in enumerate(graph_def.node, start=0):
        if node.op == 'DecodeWav' \
        or node.op == 'AudioSpectrogram' \
        or node.op == 'Mfcc' \
        or node.op == 'Placeholder' \
        or node.op == 'wav_data':
            removed_count += 1
            pass
        else:
            nodes.append(node) 
    print(f'NUM of layers removed: {removed_count}')

    new_graph = tf_compat_v1.GraphDef()
    new_graph.node.extend(nodes)
    ##log new graph
    if DEBUG_LOG:
        with open(os.path.join(build_dir, f'{model_name}_graph_new.log'), 'w') as new_graph_log:
            new_graph_log.write(str(new_graph))

    ##get mod and params with new graph
    shape_dict = {'Mfcc': (1, 49, 10)}
    mod, params = relay.frontend.from_tensorflow(new_graph,
                                                layout=layout,
                                                shape=shape_dict)

    if DEBUG_LOG:
        with open(os.path.join(build_dir, f'{model_name}_mod.log'), 'w') as mod_file:
            mod_file.write(str(mod))
        with open(os.path.join(build_dir, f'{model_name}_param.log'), 'w') as param_log:
            param_log.write(str(params))

    #quantization
    if opts.quantize:
        if not opts.global_scale:
            raise RuntimeError('Global Scale is not valid!')
        global_scale = float(opts.global_scale)
        print('INFO: Quantizing...')
        print(f'INFO: Global Scale: {global_scale}')
        with relay.quantize.qconfig(calibrate_mode='global_scale', 
                                    global_scale=global_scale,
                                    skip_conv_layers=[0]):
            mod = relay.quantize.quantize(mod, params)

        if DEBUG_LOG:
            with open(os.path.join(build_dir, f'{model_name}_mod_quantized.log'), 'w') as mod_log:
                mod_log.write(str(mod))

    #save module
    if opts.quantize:
        file_path = f'{build_dir}/module_gs_{global_scale}.pickle'
        with open(file_path, 'wb') as h1:
            pickle.dump(mod, h1, protocol=pickle.HIGHEST_PROTOCOL)
            print(f'INFO: {file_path} saved!')
        with open(f'{build_dir}/module_gs_{global_scale}.txt', 'w') as f:
            f.write(mod.astext())
    else:
        file_path = f'{build_dir}/module.pickle'
        with open(file_path, 'wb') as h1:
            pickle.dump(mod, h1, protocol=pickle.HIGHEST_PROTOCOL)
            print(f'INFO: {file_path} saved!')
        param_path = f'{build_dir}/params.bin'
        with open(param_path, 'wb') as f_params:
            f_params.write(relay.save_param_dict(params))
            print(f'INFO: {param_path} saved!')
        with open(f'{build_dir}/module.txt', 'w') as f:
            f.write(mod.astext())
    return mod, params
示例#22
0
def export_tvm(path,
               block,
               data_shape,
               epoch=0,
               preprocess=True,
               layout='HWC',
               ctx=mx.cpu(),
               target='llvm',
               opt_level=3,
               use_autotvm=False):
    """Helper function to export a HybridBlock to TVM executable. Note that tvm package needs
    to be installed(https://tvm.ai/).

    Parameters
    ----------
    path : str
        Path to save model.
        Three files path_deploy_lib.tar, path_deploy_graph.json and path_deploy_xxxx.params
        will be created, where xxxx is the 4 digits epoch number.
    block : mxnet.gluon.HybridBlock
        The hybridizable block. Note that normal gluon.Block is not supported.
    data_shape : tuple of int, required
        Unlike `export_block`, `data_shape` is required here for the purpose of optimization.
        If dynamic shape is required, you can use the shape that most fits the inference tasks,
        but the optimization won't accommodate all situations.
    epoch : int
        Epoch number of saved model.
    preprocess : mxnet.gluon.HybridBlock, default is True.
        Preprocess block prior to the network.
        By default (True), it will subtract mean [123.675, 116.28, 103.53], divide
        std [58.395, 57.12, 57.375], and convert original image (B, H, W, C and range [0, 255]) to
        tensor (B, C, H, W) as network input. This is the default preprocess behavior of all GluonCV
        pre-trained models.
        You can use custom pre-process hybrid block or disable by set ``preprocess=None``.
    layout : str, default is 'HWC'
        The layout for raw input data. By default is HWC. Supports 'HWC' and 'CHW'.
        Note that image channel order is always RGB.
    ctx: mx.Context, default mx.cpu()
        Network context.
    target : str, default is 'llvm'
        Runtime type for code generation, can be ('llvm', 'cuda', 'opencl', 'metal'...)
    opt_level : int, default is 3
        TVM optimization level, if supported, higher `opt_level` may generate more efficient
        runtime library, however, some operator may not support high level optimization, which will
        fallback to lower `opt_level`.
    use_autotvm : bool, default is False
        Use autotvm for performance tuning. Note that this can take very long time, since it's a
        search and model based tuning process.

    Returns
    -------
    None

    """
    try:
        import tvm
        from tvm import autotvm
        from tvm import relay
        from tvm.relay import testing
        from tvm.autotvm.tuner import XGBTuner, RandomTuner
        import tvm.contrib.graph_runtime as runtime
    except ImportError:
        print(
            "TVM package required, please refer https://tvm.ai/ for installation guide."
        )
        raise

    # add preprocess block if necessary
    if preprocess:
        # add preprocess block
        if preprocess is True:
            preprocess = _DefaultPreprocess()
        else:
            if not isinstance(preprocess, HybridBlock):
                raise TypeError(
                    "preprocess must be HybridBlock, given {}".format(
                        type(preprocess)))
        wrapper_block = nn.HybridSequential()
        preprocess.initialize(ctx=ctx)
        wrapper_block.add(preprocess)
        wrapper_block.add(block)
    else:
        wrapper_block = block
    wrapper_block.collect_params().reset_ctx(ctx)

    # convert to relay graph
    sym, params = relay.frontend.from_mxnet(wrapper_block,
                                            shape={"data": data_shape})

    if use_autotvm:

        def tune_kernels(tasks,
                         measure_option,
                         tuner='gridsearch',
                         early_stopping=None,
                         log_filename='tuning.log'):
            for i, tsk in enumerate(tasks):
                prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))

                # converting conv2d tasks to conv2d_NCHWc tasks
                op_name = tsk.workload[0]
                if op_name == 'conv2d':
                    func_create = 'topi_x86_conv2d_NCHWc'
                elif op_name == 'depthwise_conv2d_nchw':
                    func_create = 'topi_x86_depthwise_conv2d_NCHWc_from_nchw'
                else:
                    raise ValueError(
                        "Tuning {} is not supported on x86".format(op_name))

                task = autotvm.task.create(func_create,
                                           args=tsk.args,
                                           target=target,
                                           template_key='direct')
                task.workload = tsk.workload

                # create tuner
                if tuner in ('xgb', 'xgb-rank'):
                    tuner_obj = XGBTuner(task, loss_type='rank')
                elif tuner == 'ga':
                    tuner_obj = GATuner(task, pop_size=50)
                elif tuner == 'random':
                    tuner_obj = RandomTuner(task)
                elif tuner == 'gridsearch':
                    tuner_obj = GridSearchTuner(task)
                else:
                    raise ValueError("Invalid tuner: " + tuner)

                # do tuning
                n_trial = len(task.config_space)
                tuner_obj.tune(n_trial=n_trial,
                               early_stopping=early_stopping,
                               measure_option=measure_option,
                               callbacks=[
                                   autotvm.callback.progress_bar(
                                       n_trial, prefix=prefix),
                                   autotvm.callback.log_to_file(log_filename)
                               ])

        #
        tasks = autotvm.task.extract_from_program(sym,
                                                  target=target,
                                                  params=params,
                                                  ops=(relay.op.nn.conv2d, ))
        logging.warning('Start tunning, this can be slow...')
        tuning_option = {
            'log_filename':
            'tune.log',
            'tuner':
            'random',
            'early_stopping':
            None,
            'measure_option':
            autotvm.measure_option(
                builder=autotvm.LocalBuilder(),
                runner=autotvm.LocalRunner(number=10,
                                           repeat=1,
                                           min_repeat_ms=1000),
            ),
        }
        tune_kernels(tasks, **tuning_option)

        with autotvm.apply_history_best(log_file):
            with relay.build_config(opt_level=opt_level):
                graph, lib, params = relay.build_module.build(sym,
                                                              target=target,
                                                              params=params)

    else:
        with relay.build_config(opt_level=opt_level):
            graph, lib, params = relay.build_module.build(sym,
                                                          target,
                                                          params=params)

    # export library, json graph and parameters
    lib.export_library(path + '_deploy_lib.so')
    with open(path + '_deploy_graph.json', 'w') as fo:
        fo.write(graph)
    with open(path + '_deploy_{:04n}.params'.format(epoch), 'wb') as fo:
        try:
            fo.write(relay.compiler.save_param_dict(params))
        except AttributeError:
            fo.write(relay.save_param_dict(params))
示例#23
0
def compile(info):
    if info['model_path'].endswith('.onnx'):
        is_onnx = True
    elif info['model_path'].endswith('.pb'):
        is_onnx = False
    else:
        raise Exception('Model file format not supported')

    # Load model
    if is_onnx:
        onnx_model = onnx.load(info['model_path'])
        mod, params = relay.frontend.from_onnx(onnx_model, info['input_dict'])
        optimization_level = 3
    else:
        with tf.compat.v1.Session() as sess:
            with tf.io.gfile.GFile(info['model_path'], 'rb') as f:
                graph_def = tf.compat.v1.GraphDef()
                graph_def.ParseFromString(f.read())
                input_map = {}
                for index, (name,
                            shape) in enumerate(info['input_dict'].items()):
                    tf_new_image = tf.compat.v1.placeholder(
                        shape=[1 if x == -1 else x for x in shape],
                        dtype=info['input_data_type'],
                        name=name)
                    input_map["input:" + str(index)] = tf_new_image
                tf.import_graph_def(graph_def, name='', input_map=input_map)
                graph_def = sess.graph.as_graph_def()
                graph_def = tf_testing.ProcessGraphDefParam(graph_def)
        input_shape_dict = {'DecodeJpeg/contents': info['input_list']}
        mod, params = relay.frontend.from_tensorflow(
            graph_def, shape=input_shape_dict, outputs=info['output_names'])
        optimization_level = 2

    # Set compilation params
    if info['cross_compile']:
        if info['target'] == 'cuda':
            raise Exception('cuda cross-compilation not supported yet')
        info['target'] += ' -target=aarch64-linux-gnu'

    # Transform data layout to what is expected by CUDA hardware, i.e. NCHW
    if info['target'] == 'cuda':
        desired_layouts = {'nn.conv2d': ['NCHW', 'default']}
        seq = tvm.transform.Sequential([
            relay.transform.RemoveUnusedFunctions(),
            relay.transform.ConvertLayout(desired_layouts)
        ])
        with tvm.transform.PassContext(opt_level=3):
            mod = seq(mod)

    # Compile model
    # Note opt_level cannot be higher than 2 because of a bug:
    # https://discuss.tvm.ai/t/tvm-0-6-1-compile-yolo-v2-tiny-fail-worked-in-v0-5-2/7244
    with autotvm.apply_history_best(info['autotvm_log']):
        with relay.build_config(opt_level=optimization_level):
            graph, lib, params = relay.build(mod,
                                             target=info['target'],
                                             params=params)

    # Write the compiled model to files
    output_model_path = path.join(info['output_path'],
                                  OUTPUT_NETWORK_MODULE_FILENAME)
    output_graph_path = path.join(info['output_path'],
                                  OUTPUT_NETWORK_GRAPH_FILENAME)
    output_param_path = path.join(info['output_path'],
                                  OUTPUT_NETWORK_PARAM_FILENAME)

    print('Writing library to', output_model_path)
    if info['cross_compile']:
        lib.export_library(
            output_model_path,
            cc.build_create_shared_func(options=[
                '--target=aarch64-linux-gnu', '-march=armv8-a', '-mfpu=NEON'
            ],
                                        compile_cmd='/usr/bin/clang'))
    else:
        lib.export_library(output_model_path)

    print('Writing graph to', output_graph_path)
    with open(output_graph_path, 'w') as graph_file:
        graph_file.write(graph)

    print('Writing weights to', output_param_path)
    with open(output_param_path, 'wb') as param_file:
        param_file.write(relay.save_param_dict(params))
示例#24
0
    print("Tuning graph...")
    # tune_graph(mod["main"], data_shape, option['log_file'], option['graph_best_file'])

    print("Compile...")
    # if use tune_tasks
    # with autotvm.apply_history_best(option['log_best_file']): 
    # if use tune_graph
    # with autotvm.apply_graph_best(option['graph_best_file']):
    with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(mod, target=option['target'], params=params_)

    print('Exporting library...')
    lib.export_library(option['path_lib'])
    with open(option['path_graph'], "w") as fo: fo.write(graph)
    with open(option['path_params'], "wb") as fo: fo.write(relay.save_param_dict(params))

    print('Loading library...')
    loaded_lib = tvm.module.load(option['path_lib'])
    loaded_graph = open(option['path_graph']).read()
    loaded_params = bytearray(open(option['path_params'], 'rb').read())

    print('Runing...')
    ctx = tvm.context(option['target'], 0)
    data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype('float32'))
    m = tvm.contrib.graph_runtime.create(loaded_graph, loaded_lib, ctx)
    m.load_params(loaded_params)
    m.run(**{input_name:data_tvm}) #or m.set_input(input_name, data_tvm); m.run()
    out = m.get_output(0)
    print(out.asnumpy().argmax())
示例#25
0
def build_conv2d_module(opts):
    batch = 1
    in_channel = 3
    out_channel = 16
    in_size = 8
    kernel = 3
    pad = 1
    stride = 1

    A = relay.var('A', shape=(batch, in_channel, in_size, in_size))
    W = relay.var('W', shape=(out_channel, in_channel, kernel, kernel))
    B = relay.op.nn.nn.conv2d(A,
                              W,
                              strides=(stride, stride),
                              padding=(pad, pad),
                              kernel_size=kernel,
                              data_layout='NCHW',
                              kernel_layout='OIHW',
                              out_layout='',
                              out_dtype='')

    a_data = np.random.uniform(size=(batch, in_channel, in_size,
                                     in_size)).astype('float32')
    w_data = np.random.uniform(size=(out_channel, in_channel, kernel,
                                     kernel)).astype('float32')
    func = relay.Function([A, W], B)
    params = {"W": w_data}
    graph, lib, params = relay.build_module.build(tvm.IRModule.from_expr(func),
                                                  target=TARGET,
                                                  params=params)

    build_dir = os.path.abspath(opts.out_dir)
    if not os.path.isdir(build_dir):
        os.makedirs(build_dir)

    lib.save(os.path.join(build_dir, 'conv2d_model.o'))
    with open(os.path.join(build_dir, 'conv2d_graph.json'),
              'w') as f_graph_json:
        f_graph_json.write(graph)
    with open(os.path.join(build_dir, 'conv2d_params.bin'), 'wb') as f_params:
        f_params.write(relay.save_param_dict(params))
    with open(os.path.join(build_dir, "conv2d_data.bin"), "wb") as fp:
        fp.write(a_data.astype(np.float32).tobytes())

    ## get TVM result on local machine
    params = {"W": w_data}
    local_target = 'llvm --system-lib'
    graph, lib, params = relay.build_module.build(tvm.IRModule.from_expr(func),
                                                  target=local_target,
                                                  params=params)
    tvm_out = run_conv2d_module(a_data,
                                graph,
                                lib,
                                params,
                                target=local_target)
    b_np = conv2d_nchw_python(a_data, w_data, (stride, stride), (pad, pad))
    print("TVM Output: " + str(tvm_out.shape))
    print("Numpy Output: " + str(b_np.shape))
    np.testing.assert_allclose(b_np, tvm_out, rtol=1e-2)
    with open(os.path.join(build_dir, "conv2d_output.bin"), "wb") as fp:
        fp.write(tvm_out.astype(np.float32).tobytes())
示例#26
0
文件: model.py 项目: NathanTP/tvm
    def export_classic_format(
        self,
        executor_factory: GraphExecutorFactoryModule,
        package_path: Optional[str] = None,
        cross: Optional[Union[str, Callable]] = None,
        cross_options: Optional[str] = None,
        lib_format: str = "so",
    ):
        """Save this TVMCModel to file.
        Parameters
        ----------
        executor_factory : GraphExecutorFactoryModule
            The factory containing compiled the compiled artifacts needed to run this model.
        package_path : str, None
            Where the model should be saved. Note that it will be packaged as a .tar file.
            If not provided, the package will be saved to a generically named file in tmp.
        cross : str or callable object, optional
            Function that performs the actual compilation.
        cross_options : str, optional
            Command line options to be passed to the cross compiler.
        lib_format : str
            How to export the modules function library. Must be one of "so" or "tar".

        Returns
        -------
        package_path : str
            The path that the package was saved to.
        """
        lib_name = "mod." + lib_format
        graph_name = "mod.json"
        param_name = "mod.params"

        temp = self._tmp_dir
        if package_path is None:
            package_path = self.default_package_path()
        path_lib = temp.relpath(lib_name)

        if not cross:
            executor_factory.get_lib().export_library(path_lib)
        else:
            if not cross_options:
                executor_factory.get_lib().export_library(
                    path_lib, tvm.contrib.cc.cross_compiler(cross))
            else:
                executor_factory.get_lib().export_library(
                    path_lib,
                    tvm.contrib.cc.cross_compiler(
                        cross, options=cross_options.split(" ")))
        self.lib_path = path_lib

        with open(temp.relpath(graph_name), "w") as graph_file:
            graph_file.write(executor_factory.get_graph_json())

        with open(temp.relpath(param_name), "wb") as params_file:
            params_file.write(
                relay.save_param_dict(executor_factory.get_params()))

        # Package up all the temp files into a tar file.
        with tarfile.open(package_path, "w") as tar:
            tar.add(path_lib, lib_name)
            tar.add(temp.relpath(graph_name), graph_name)
            tar.add(temp.relpath(param_name), param_name)

        return package_path
示例#27
0
        lib_name = "main.so"
    elif platform.system() == "Windows":
        lib_name = "main.dll"
    else:
        raise Exception("unknown system " + platform.system())

    print("export_library main lib")
    lib.export_library(lib_name)

    # or save object file for deploy usage
    # lib.save(os.path.join(work_root, binary_dir, 'model.o'))

    print("load main lib")
    sysLib = tvm.runtime.load_module(lib_name)

    ctx = tvm.cpu(0)

    input_data = np.random.random(dshape).astype(np.float32)

    for fk in ret_mods:
        mg = ret_mods[fk].get_json()
        mp = ret_mods[fk].get_params()
        print("test " + fk + "   ------------------------------------")
        module = graph_runtime.create(mg, sysLib, ctx)
        module.load_params(relay.save_param_dict(mp))
        module.set_input("data", tvm.nd.array(input_data))
        module.run()
        num_output = module.get_num_outputs()
        for idx in range(num_output):
            print(module.get_output(idx).shape)
示例#28
0
def compile(info):
    if info['model_path'].endswith('.onnx'):
        is_onnx = True
    elif info['model_path'].endswith('.pb'):
        is_onnx = False
    else:
        raise Exception('Model file format not supported')

    # Load model
    if is_onnx:
        onnx_model = onnx.load(info['model_path'])
        mod, params = relay.frontend.from_onnx(onnx_model, info['input_dict'])
        optimization_level = 3
    else:
        with tf.compat.v1.Session() as sess:
            with tf.io.gfile.GFile(info['model_path'], 'rb') as f:
                graph_def = tf.compat.v1.GraphDef()
                graph_def.ParseFromString(f.read())
                tf.import_graph_def(graph_def, name='')
                graph_def = sess.graph.as_graph_def()
                graph_def = tf_testing.ProcessGraphDefParam(graph_def)

        input_shape_dict = {'DecodeJpeg/contents': info['input_list']}
        mod, params = relay.frontend.from_tensorflow(
            graph_def, shape=input_shape_dict, outputs=info['output_names'])
        optimization_level = 2

    # Set compilation params
    target = 'llvm'
    if info['cross_compile']:
        target += ' -target=aarch64-linux-gnu'

    # Compile model
    # Note opt_level cannot be higher than 2 because of a bug:
    # https://discuss.tvm.ai/t/tvm-0-6-1-compile-yolo-v2-tiny-fail-worked-in-v0-5-2/7244
    with relay.build_config(opt_level=optimization_level):
        graph, lib, params = relay.build(mod, target=target, params=params)

    # Write the compiled model to files
    output_model_path = path.join(info['output_path'],
                                  OUTPUT_NETWORK_MODULE_FILENAME)
    output_graph_path = path.join(info['output_path'],
                                  OUTPUT_NETWORK_GRAPH_FILENAME)
    output_param_path = path.join(info['output_path'],
                                  OUTPUT_NETWORK_PARAM_FILENAME)

    print('Writing library to', output_model_path)
    if info['cross_compile']:
        lib.export_library(
            output_model_path,
            cc.build_create_shared_func(options=[
                '--target=aarch64-linux-gnu', '-march=armv8-a', '-mfpu=NEON'
            ],
                                        compile_cmd='/usr/bin/clang'))
    else:
        lib.export_library(output_model_path)

    print('Writing graph to', output_graph_path)
    with open(output_graph_path, 'w') as graph_file:
        graph_file.write(graph)

    print('Writing weights to', output_param_path)
    with open(output_param_path, 'wb') as param_file:
        param_file.write(relay.save_param_dict(params))
示例#29
0
def build_cifar(opts, model_name):
    from tuning.model.cifar10_relay import get_cifar_relay
    from tuning.model.cifar10_arm import get_cifar_keras, gen_custom_cifar_keras

    if model_name == 'cifar-10':
        # cifar_path = 'tuning/model/saved_models/cifar10_ch8_best.h5'
        # model_input_name = 'conv2d_1_input'
        model_input_name = 'cifar10_arm_input'
        # cifar_path = 'tuning/model/saved_models/cifar10_arm_best.h5'
        shape_dict = {model_input_name: (1, 3, 32, 32)}
        # model = get_cifar_keras(cifar_path, shape_dict)

        model = gen_custom_cifar_keras(shape_dict)
        mod, params = tvm.relay.frontend.from_keras(model, shape_dict)

        print("Compile...")
        if opts.tuned:
            print("INFO: Tuned model!")
            with autotvm.apply_history_best(
                    os.path.join('tuning', 'cifar_arm_footprint_min.txt')):
                if opts.quantize:
                    with relay.quantize.qconfig(calibrate_mode='global_scale',
                                                global_scale=8.0):
                        mod = relay.quantize.quantize(mod, params)
                        print('INFO: Quantized!')

                with relay.build_config(opt_level=3):
                    graph, lib, params = relay.build_module.build(
                        mod, target=TARGET, params=params)
        else:
            print("INFO: No Tuning!")
            with relay.build_config(opt_level=3):
                graph, lib, params = relay.build_module.build(mod,
                                                              target=TARGET,
                                                              params=params)

    elif model_name == 'cifar-10-relay':
        mod, params = get_cifar_relay()
        print('type:            ', type(mod))
        print(mod.get_global_type_var)
        shape_dict = {'data': (1, 3, 32, 32)}

        print("Compile...")
        if opts.tuned:
            print("INFO: Tuned model!")
            with autotvm.apply_history_best(
                    os.path.join('tuning', 'cifar_relay_footprint_min.txt')):
                if opts.quantize:
                    with relay.quantize.qconfig(calibrate_mode='global_scale',
                                                global_scale=8.0):
                        mod = relay.quantize.quantize(mod, params)
                        print('INFO: Quantized!')

                with relay.build_config(opt_level=3):
                    graph, lib, params = relay.build_module.build(
                        mod, target=TARGET, params=params)
        else:
            print("INFO: No Tuning!")
            with relay.build_config(opt_level=3):
                graph, lib, params = relay.build_module.build(mod,
                                                              target=TARGET,
                                                              params=params)
    else:
        raise ValueError('Wrong model name!')

    #save model, graph, params
    lib.save(os.path.join(build_dir, 'cifar_model.o'))
    with open(os.path.join(build_dir, 'cifar_graph.bin'), 'wb') as f_graph:
        f_graph.write(bytes(graph, 'utf-8'))
    with open(os.path.join(build_dir, 'cifar_graph.json'),
              'w') as f_graph_json:
        f_graph_json.write(graph)
    with open(os.path.join(build_dir, 'cifar_params.bin'), 'wb') as f_params:
        f_params.write(relay.save_param_dict(params))

    #create input and result
    if model_name == 'cifar-10':
        import keras
        from keras.datasets import cifar10
        # from keras.models import load_model

        num_classes = 10
        # model = load_model(cifar_path)
        (_, _), (x_test, y_test) = cifar10.load_data()

        x_test = x_test.astype('float32')
        x_test /= 255
        y_test = keras.utils.to_categorical(y_test, num_classes)

        test_x_sample = x_test[0:1, :, :, :]
        test_y_sample = y_test[0:1, :]
        print('x_test_sample shape:', test_x_sample.shape)
        print('y_test_sample shape:', test_y_sample.shape)
        scores = model.evaluate(test_x_sample, test_y_sample, verbose=1)
        keras_predict = model.predict(test_x_sample)
        print(keras_predict)

        ## get TVM result on local machine
        mod, params = relay.frontend.from_keras(model, shape_dict)
        local_target = 'llvm --system-lib'

        if opts.quantize:
            with relay.quantize.qconfig(calibrate_mode='global_scale',
                                        global_scale=8.0):
                mod = relay.quantize.quantize(mod, params)

        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build_module.build(mod,
                                                          target=local_target,
                                                          params=params)

        ctx = tvm.context(local_target, 0)
        ## create module
        module = tvm.contrib.graph_runtime.create(graph, lib, ctx)
        tvm_sample = test_x_sample.transpose([0, 3, 1, 2])
        # print("tvm_sample shape: ", tvm_sample.shape)
        module.set_input(model_input_name, tvm_sample)
        module.set_input(**params)
        ## run
        module.run()
        # get output
        tvm_out = module.get_output(0).asnumpy()

        print("TVM Output: " + str(tvm_out.shape))
        print("Keras Output: " + str(keras_predict.shape))
        if not opts.quantize:
            np.testing.assert_allclose(tvm_out, keras_predict, rtol=1e-2)
    elif model_name == 'cifar-10-relay':
        tvm_sample = np.array([1])
        tvm_out = np.array([1])
    else:
        raise ValueError('Wrong model name!')

    # save data and output
    with open(os.path.join(build_dir, "cifar_data.bin"), "wb") as fp:
        fp.write(tvm_sample.astype(np.float32).tobytes())
    with open(os.path.join(build_dir, "cifar_output.bin"), "wb") as fp:
        fp.write(tvm_out.astype(np.float32).tobytes())

    generate_id()
# -----------------------------
# We can also save the graph, lib and parameters into files and load them
# back in deploy environment.

####################################################

# save the graph, lib and params into separate files
from tvm.contrib import util

temp = util.tempdir()
path_lib = temp.relpath("deploy_lib.tar")
lib.export_library(path_lib)
with open(temp.relpath("deploy_graph.json"), "w") as fo:
    fo.write(graph)
with open(temp.relpath("deploy_param.params"), "wb") as fo:
    fo.write(relay.save_param_dict(params))
print(temp.listdir())

####################################################

# load the module back.
loaded_json = open(temp.relpath("deploy_graph.json")).read()
loaded_lib = tvm.runtime.load_module(path_lib)
loaded_params = bytearray(
    open(temp.relpath("deploy_param.params"), "rb").read())
input_data = tvm.nd.array(np.random.uniform(size=data_shape).astype("float32"))

module = graph_runtime.create(loaded_json, loaded_lib, ctx)
module.load_params(loaded_params)
module.run(data=input_data)
out_deploy = module.get_output(0).asnumpy()
示例#31
0
def quantize_model(args):
    """Build with relay."""
    import tvm
    from tvm import relay
    from tvm.relay import quantize as qtz
    img_size = 224
    data_shape = (args.batch_size, 3, img_size, img_size)
    mx_sym, mx_args, mx_auxs = mx.model.load_checkpoint(args.model, 0)
    net, params = relay.frontend.from_mxnet(mx_sym, {"data": data_shape},
                                            arg_params=mx_args,
                                            aux_params=mx_auxs)
    target = args.target

    if args.original:
        # run original model
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build(net, target, params=params)
        ctx = tvm.nd.context(target, 0)
        return graph, lib, params, ctx

    # constant folding and scale folding.
    # print('original')
    # print(net.astext(show_meta_data=False))
    with relay.build_config(opt_level=3):
        qgraph = relay.optimize(net, target, params)
    # print('after optimize')
    # print(qgraph.astext(show_meta_data=False))

    with qtz.qconfig(skip_k_conv=0,
                     nbit_input=args.nbit_input,
                     nbit_weight=args.nbit_input,
                     global_scale=args.global_scale,
                     dtype_input=args.dtype_input,
                     dtype_weight=args.dtype_input,
                     dtype_activation=args.dtype_output,
                     store_lowbit_output=False,
                     debug_enabled_ops=None):
        print(qtz.current_qconfig())
        qgraph = qtz.annotate(qgraph)
        # print('after annotate')
        # print(qgraph.astext(show_meta_data=False))
        qgraph = qtz.calibrate(qgraph)
        # print('after calibrate\n')
        # print(qgraph.astext(show_meta_data=False))
        if not args.simulated:
            qgraph = qtz.realize(qgraph)
            qgraph = relay.ir_pass.infer_type(qgraph)
            # print('after realize\n')
            # print(qgraph.astext(show_meta_data=False))

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(qgraph, target)

    ### save/load the graph, lib and params into separate files
    # save
    lib.export_library(os.path.join(thisdir, "deploy_lib.so"))
    with open(os.path.join(thisdir, "deploy_graph.json"), "w") as fo:
        fo.write(graph)
    with open(os.path.join(thisdir, "deploy_param.params"), "wb") as fo:
        fo.write(relay.save_param_dict(params))
    # load
    graph = open(os.path.join(thisdir, "deploy_graph.json")).read()
    lib = tvm.module.load(os.path.join(thisdir, "deploy_lib.so"))
    params = bytearray(
        open(os.path.join(thisdir, "deploy_param.params"), "rb").read())

    ctx = tvm.nd.context(target, 0)
    return graph, lib, params, ctx