예제 #1
0
def create_exec(f, target="llvm", params=None):
    if isinstance(f, relay.Expr):
        mod = relay.Module()
        mod["main"] = f
        executable = _vm.compile(mod, target=target, params=params)
        return executable
    else:
        assert isinstance(f, relay.Module), "expected mod as relay.Module"
        executable = _vm.compile(f, target=target, params=params)
        return executable
예제 #2
0
def create_vm(f, ctx=tvm.cpu(), target="llvm", params=None):
    if isinstance(f, relay.Expr):
        mod = relay.Module()
        mod["main"] = f
        vm = _vm.compile(mod, target=target, params=params)
        vm.init(ctx)
        return vm
    else:
        assert isinstance(f, relay.Module), "expected mod as relay.Module"
        vm = _vm.compile(f, target=target, params=params)
        vm.init(ctx)
        return vm
예제 #3
0
    def get_vm_output(mod,
                      data,
                      params,
                      target,
                      ctx,
                      dtype='float32',
                      number=2,
                      repeat=20):
        with tvm.transform.PassContext(opt_level=3):
            exe = vm.compile(mod, target, params=params)
            rly_vm = vm_rt.VirtualMachine(exe)
            rly_vm.init(ctx)
            result = rly_vm.run(data)

        if measure:
            print("Evaluate vm inference cost of {} on {}".format(
                model, repr(ctx)))
            ftimer = rly_vm.mod.time_evaluator("invoke",
                                               ctx,
                                               number=number,
                                               repeat=repeat)
            # Measure in millisecond.
            prof_res = np.array(ftimer("main", data).results) * 1000
            print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
                  (np.mean(prof_res), np.std(prof_res)))

        return result.asnumpy().astype(dtype)
예제 #4
0
def test_vm_onnx_process():
    import onnx
    onnx_model_path = "/data00/cuiqing.li/onnx_models/sr_dy.onnx"
    onnx_model = onnx.load(onnx_model_path)
    shape_dict = {"input.1": (1, 1, 640, 360)}
    mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

    target = tvm.target.cuda()
    ctx = tvm.context(str(target), 0)

    with tvm.transform.PassContext(opt_level=3,
                                   disabled_pass=["FoldScaleAxis"]):
        exe = vm.compile(mod, target, params=params)
        code, lib = exe.save()
        saved_dir = "tmp"
        if os.path.isdir("./tmp") == False:
            os.system("mkdir {}".format(saved_dir))

        path_lib = os.path.join(saved_dir, "lib.so")
        lib.export_library(path_lib)

        code_path = os.path.join(saved_dir, "code.ro")
        with open(code_path, "wb") as fo:
            fo.write(code)

        loaded_lib = tvm.runtime.load_module(path_lib)
        loaded_code = bytearray(open(code_path, "rb").read())

        # deserialize.
        des_exec = _vm.Executable.load_exec(loaded_code, loaded_lib)
        des_vm = _vm.VirtualMachine(des_exec, ctx)

        input_shape = [1, 1, 640, 360]
        dtype = "float32"
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=input_shape)).astype(dtype))
        data = []
        data.append(data_tvm)
        data = tuple(data)
        res = des_vm.run(*data)

        print("Evaluate vm inference cost of {} on {}".format(
            "your testing model", repr(ctx)))
        ftimer_warmup = des_vm.module.time_evaluator("invoke",
                                                     ctx,
                                                     number=1,
                                                     repeat=50)
        # Measure in millisecond.
        print("finished warming up and start testing vm compile performance")
        ftimer = des_vm.module.time_evaluator("invoke",
                                              ctx,
                                              number=1,
                                              repeat=600)
        # Measure in millisecond.
        prof_res = np.array(ftimer("main", *data).results) * 1000
        #prof_res = np.array(ftimer().results) * 1000
        print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
예제 #5
0
 def get_tvm_executor(irmod, executor, target, params):
     if executor == "vm":
         log.info("Build TVM virtual machine")
         lib = vm.compile(
             copy.deepcopy(irmod),
             target,
             params=params,
         )
     elif executor == "graph":
         log.info("Build TVM graph executor")
         lib = relay.build(irmod, target=target, params=params)
     else:
         log.error("Executor type {} is unsupported. ".format(executor) +
                   "Only \"vm\" and \"graph\" types are supported")
         return None
     return lib
예제 #6
0
def vm_tensorflow_model_process():
    def normalize_node_name(nodes):
        from tensorflow.compat import as_text
        if isinstance(nodes, list):
            ret = [as_text(node.split(':', 1)[0], 'ascii') for node in nodes]
        else:
            ret = as_text(nodes.split(':', 1)[0], 'ascii')

        return ret

    import tensorflow as tf
    from tvm.relay.frontend.tensorflow_parser import TFParser
    TF_pb_path = "/home/tiger/cuiqing.li/models/TF_checkpoint/latest"
    graph_def = TFParser(TF_pb_path).parse()
    input_names = ["input_ids_1:0", "input_mask_1:0", "segment_ids_1:0"]
    output_names = ["loss/Softmax:0"]
    input_shapes = [[1, 256], [1, 256], [1, 256]]

    input_names = [normalize_node_name(i) for i in input_names]
    output_names = [normalize_node_name(i) for i in output_names]
    mod, params = relay.frontend.from_tensorflow(
        graph_def,
        shape={k: v
               for k, v in zip(input_names, input_shapes)},
        layout=None,
        outputs=output_names)

    desired_layouts = {'nn.conv2d': ['NCHW', 'default']}
    seq = tvm.transform.Sequential([
        relay.transform.RemoveUnusedFunctions(),
        relay.transform.ConvertLayout(desired_layouts)
    ])
    with tvm.ir.transform.PassContext(opt_level=3):
        mod = seq(mod)

    target = tvm.target.cuda()
    ctx = tvm.context(str(target), 0)

    with tvm.transform.PassContext(opt_level=3,
                                   disabled_pass=["FoldScaleAxis"]):
        exe = vm.compile(mod, target, params=params)
        code, lib = exe.save()
        saved_dir = "tmp"
        if os.path.isdir("./tmp") == False:
            os.system("mkdir {}".format(saved_dir))

        path_lib = os.path.join(saved_dir, "lib.so")
        lib.export_library(path_lib)

        code_path = os.path.join(saved_dir, "code.ro")
        with open(code_path, "wb") as fo:
            fo.write(code)

        loaded_lib = tvm.runtime.load_module(path_lib)
        loaded_code = bytearray(open(code_path, "rb").read())

        # deserialize.
        des_exec = _vm.Executable.load_exec(loaded_code, loaded_lib)
        des_vm = _vm.VirtualMachine(des_exec, ctx)

        data = []
        idx = 0
        for input_shape in input_shapes:
            dtype = "int32"
            data_tvm = tvm.nd.array(
                (np.random.uniform(size=input_shape)).astype(dtype), ctx)
            data.append(data_tvm)
            idx += 1
        data = tuple(data)
        res = des_vm.run(*data)

        print("Evaluate vm inference cost of {} on {}".format(
            "your testing model", repr(ctx)))
        ftimer_warmup = des_vm.module.time_evaluator("invoke",
                                                     ctx,
                                                     number=1,
                                                     repeat=50)
        # Measure in millisecond.
        print("finished warming up and start testing vm compile performance")
        ftimer = des_vm.module.time_evaluator("invoke",
                                              ctx,
                                              number=1,
                                              repeat=100)
        # Measure in millisecond.
        prof_res = np.array(ftimer("main", *data).results) * 1000
        #prof_res = np.array(ftimer().results) * 1000
        print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))