Python VirtualMachine示例，tvm.runtime.vm.VirtualMachine Python示例

示例#1

0

显示文件

文件： test_vm_serialization.py 项目： redpanda3/incubator-tvm

def test_adt_list():
    mod = relay.Module()
    p = Prelude(mod)

    l1 = p.cons(relay.const(1), p.nil())
    l21 = p.cons(relay.const(2), l1)
    l321 = p.cons(relay.const(3), l21)

    f = relay.Function([], l321)
    mod["main"] = f

    exe = create_exec(mod)
    code, lib = exe.save()
    des_exec = _vm.Executable.load_exec(code, lib)
    des_vm = _vm.VirtualMachine(des_exec)
    des_vm.init(tvm.cpu())

    result = veval(des_vm)
    assert len(result) == 2
    assert len(result[1]) == 2
    assert len(result[1][1]) == 2
    res = []
    res.append(result[0].asnumpy().tolist())
    res.append(result[1][0].asnumpy().tolist())
    res.append(result[1][1][0].asnumpy().tolist())
    tvm.testing.assert_allclose(res, np.array([3, 2, 1]))

示例#2

0

显示文件

文件： test_vm_serialization.py 项目： redpanda3/incubator-tvm

def test_loop():
    mod = relay.module.Module({})
    sum_up = relay.GlobalVar('sum_up')
    i = relay.var('i', shape=[], dtype='int32')
    accum = relay.var('accum', shape=[], dtype='int32')
    sb = ScopeBuilder()
    with sb.if_scope(relay.equal(i, relay.const(0, 'int32'))):
        sb.ret(accum)
    with sb.else_scope():
        one_less = relay.subtract(i, relay.const(1, 'int32'))
        new_accum = relay.add(accum, i)
        sb.ret(relay.Call(sum_up, [one_less, new_accum]))
    func = relay.Function([i, accum], sb.get())
    mod[sum_up] = func
    loop_bound = 0
    i_data = np.array(loop_bound, dtype='int32')
    accum_data = np.array(0, dtype='int32')
    iarg = relay.var('i', shape=[], dtype='int32')
    aarg = relay.var('accum', shape=[], dtype='int32')
    mod["main"] = relay.Function([iarg, aarg], sum_up(iarg, aarg))

    exe = create_exec(mod)
    code, lib = exe.save()
    des_exec = _vm.Executable.load_exec(code, lib)
    des_vm = _vm.VirtualMachine(des_exec)
    des_vm.init(tvm.cpu())

    result = veval(des_vm, i_data, accum_data)
    tvm.testing.assert_allclose(result.asnumpy(), sum(range(1, loop_bound + 1)))

示例#3

0

显示文件

def get_serialized_output(mod, *data, params=None, target="llvm", ctx=tvm.cpu()):
    exe = create_exec(mod, target, params=params)
    code, lib = exe.save()
    des_exec = _vm.Executable.load_exec(code, lib)
    des_vm = _vm.VirtualMachine(des_exec, ctx)
    result = des_vm.run(*data)
    return result

示例#4

0

显示文件

    def get_vm_output(mod,
                      data,
                      params,
                      target,
                      ctx,
                      dtype='float32',
                      number=2,
                      repeat=20):
        with tvm.transform.PassContext(opt_level=3):
            exe = vm.compile(mod, target, params=params)
            rly_vm = vm_rt.VirtualMachine(exe)
            rly_vm.init(ctx)
            result = rly_vm.run(data)

        if measure:
            print("Evaluate vm inference cost of {} on {}".format(
                model, repr(ctx)))
            ftimer = rly_vm.mod.time_evaluator("invoke",
                                               ctx,
                                               number=number,
                                               repeat=repeat)
            # Measure in millisecond.
            prof_res = np.array(ftimer("main", data).results) * 1000
            print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
                  (np.mean(prof_res), np.std(prof_res)))

        return result.asnumpy().astype(dtype)

示例#5

0

显示文件

def test_save_load():
    x = relay.var("x", shape=(10, 10))
    f = relay.Function([x], x + x)
    x_data = np.random.rand(10, 10).astype("float32")

    # serialize.
    vm = create_exec(f)
    code, lib = vm.save()
    assert isinstance(code, bytearray)

    # save and load the code and lib file.
    tmp = utils.tempdir()
    path_lib = tmp.relpath("lib.so")
    lib.export_library(path_lib)
    with open(tmp.relpath("code.ro"), "wb") as fo:
        fo.write(code)

    loaded_lib = tvm.runtime.load_module(path_lib)
    loaded_code = bytearray(open(tmp.relpath("code.ro"), "rb").read())

    # deserialize.
    des_exec = _vm.Executable.load_exec(loaded_code, loaded_lib)
    des_vm = _vm.VirtualMachine(des_exec, tvm.cpu())

    res = des_vm.run(x_data)
    tvm.testing.assert_allclose(res.asnumpy(), x_data + x_data)

示例#6

0

显示文件

文件： vm.py 项目： whn09/incubator-tvm

 def __init__(self, mod, ctx, target):
     if mod is None:
         raise RuntimeError("Must provide module to get VM executor.")
     self.mod = mod
     self.ctx = ctx
     self.target = target
     self.executable = compile(mod, target)
     self.vm = vm_rt.VirtualMachine(self.executable, ctx)

示例#7

0

显示文件

文件： test_vm_serialization.py 项目： redpanda3/incubator-tvm

 def get_serialized_output(mod, data, params, target, ctx, dtype='float32'):
     exe = create_exec(mod, target, params=params)
     code, lib = exe.save()
     des_exec = _vm.Executable.load_exec(code, lib)
     des_vm = _vm.VirtualMachine(des_exec)
     des_vm.init(ctx)
     result = des_vm.run(data)
     return result.asnumpy().astype(dtype)

示例#8

0

显示文件

def test_vm_onnx_process():
    import onnx
    onnx_model_path = "/data00/cuiqing.li/onnx_models/sr_dy.onnx"
    onnx_model = onnx.load(onnx_model_path)
    shape_dict = {"input.1": (1, 1, 640, 360)}
    mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

    target = tvm.target.cuda()
    ctx = tvm.context(str(target), 0)

    with tvm.transform.PassContext(opt_level=3,
                                   disabled_pass=["FoldScaleAxis"]):
        exe = vm.compile(mod, target, params=params)
        code, lib = exe.save()
        saved_dir = "tmp"
        if os.path.isdir("./tmp") == False:
            os.system("mkdir {}".format(saved_dir))

        path_lib = os.path.join(saved_dir, "lib.so")
        lib.export_library(path_lib)

        code_path = os.path.join(saved_dir, "code.ro")
        with open(code_path, "wb") as fo:
            fo.write(code)

        loaded_lib = tvm.runtime.load_module(path_lib)
        loaded_code = bytearray(open(code_path, "rb").read())

        # deserialize.
        des_exec = _vm.Executable.load_exec(loaded_code, loaded_lib)
        des_vm = _vm.VirtualMachine(des_exec, ctx)

        input_shape = [1, 1, 640, 360]
        dtype = "float32"
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=input_shape)).astype(dtype))
        data = []
        data.append(data_tvm)
        data = tuple(data)
        res = des_vm.run(*data)

        print("Evaluate vm inference cost of {} on {}".format(
            "your testing model", repr(ctx)))
        ftimer_warmup = des_vm.module.time_evaluator("invoke",
                                                     ctx,
                                                     number=1,
                                                     repeat=50)
        # Measure in millisecond.
        print("finished warming up and start testing vm compile performance")
        ftimer = des_vm.module.time_evaluator("invoke",
                                              ctx,
                                              number=1,
                                              repeat=600)
        # Measure in millisecond.
        prof_res = np.array(ftimer("main", *data).results) * 1000
        #prof_res = np.array(ftimer().results) * 1000
        print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))

示例#9

0

显示文件

文件： vm.py 项目： zotanika/incubator-tvm

    def _make_executor(self, expr=None):
        if expr:
            self.mod["main"] = expr

        self.executable = compile(self.mod, self.target)
        self.vm = vm_rt.VirtualMachine(self.executable, self.device)

        def _vm_wrapper(*args, **kwargs):
            args = self._convert_args(self.mod["main"], args, kwargs)
            return self.vm.run(*args)

        return _vm_wrapper

示例#10

0

显示文件

文件： test_vm_serialization.py 项目： redpanda3/incubator-tvm

def test_const():
    c = relay.const(1.0, "float32")
    x = relay.var('x', shape=(10, 10), dtype='float32')
    f = relay.Function([x], x + c)
    exe = create_exec(f)
    code, lib = exe.save()
    assert isinstance(code, bytearray)
    des_exec = _vm.Executable.load_exec(code, lib)
    des_vm = _vm.VirtualMachine(des_exec)
    des_vm.init(tvm.cpu())
    x_data = np.random.rand(10, 10).astype('float32')
    res = veval(des_vm, x_data)
    tvm.testing.assert_allclose(res.asnumpy(), x_data + 1)

示例#11

0

显示文件

文件： test_vm_serialization.py 项目： redpanda3/incubator-tvm

def test_tuple():
    ttype = relay.TupleType([relay.TensorType((1,)), relay.TensorType((10,))])
    tup = relay.var('tup', type_annotation=ttype)
    f = relay.Function([tup], relay.TupleGetItem(tup, 1))
    i_data = np.random.rand(41).astype('float32')
    j_data = np.random.rand(10).astype('float32')

    exe = create_exec(f)
    code, lib = exe.save()
    des_exec = _vm.Executable.load_exec(code, lib)
    des_vm = _vm.VirtualMachine(des_exec)
    des_vm.init(tvm.cpu())

    result = veval(des_vm, (i_data, j_data))
    tvm.testing.assert_allclose(result.asnumpy(), j_data)

示例#12

0

显示文件

文件： test_vm_serialization.py 项目： redpanda3/incubator-tvm

def test_closure():
    x = relay.var('x', shape=())
    y = relay.var('y', shape=())
    f = relay.Function([x], x + y)
    ff = relay.Function([y], f)
    clo = ff(relay.const(1.0))
    main = clo(relay.const(2.0))

    exe = create_exec(main)
    code, lib = exe.save()
    des_exec = _vm.Executable.load_exec(code, lib)
    des_vm = _vm.VirtualMachine(des_exec)
    des_vm.init(tvm.cpu())

    res = veval(des_vm)
    tvm.testing.assert_allclose(res.asnumpy(), 3.0)

示例#13

0

显示文件

文件： test_vm_serialization.py 项目： redpanda3/incubator-tvm

def test_adt_compose():
    mod = relay.Module()
    p = Prelude(mod)

    compose = p.compose

    # add_one = fun x -> x + 1
    sb = relay.ScopeBuilder()
    x = relay.var('x', 'float32')
    x1 = sb.let('x1', x)
    xplusone = x1 + relay.const(1.0, 'float32')
    sb.ret(xplusone)
    body = sb.get()
    add_one = relay.GlobalVar("add_one")
    add_one_func = relay.Function([x], body)

    # add_two = compose(add_one, add_one)
    sb = relay.ScopeBuilder()
    y = relay.var('y', 'float32')
    add_two_func = sb.let('add_two', compose(add_one_func, add_one_func))
    add_two_res = add_two_func(y)
    sb.ret(add_two_res)
    add_two_body = sb.get()

    mod[add_one] = add_one_func

    f = relay.Function([y], add_two_body)
    mod["main"] = f

    exe = create_exec(mod)
    code, lib = exe.save()
    des_exec = _vm.Executable.load_exec(code, lib)
    des_vm = _vm.VirtualMachine(des_exec)
    des_vm.init(tvm.cpu())

    x_data = np.array(np.random.rand()).astype('float32')
    result = veval(des_vm, x_data)

    tvm.testing.assert_allclose(result.asnumpy(), x_data + 2.0)

示例#14

0

显示文件

def main():
    params_dict = config
    saved_dir = params_dict['saved_dir']

    #target = "cuda -libs=cudnn,cublas"
    target = params_dict['target']
    ctx = tvm.context(str(target), 0)
    path_lib = os.path.join(saved_dir, "lib.so")
    code_path = os.path.join(saved_dir, "code.ro")

    loaded_lib = tvm.runtime.load_module(path_lib)
    loaded_code = bytearray(open(code_path, "rb").read())

    # deserialize.
    des_exec = _vm.Executable.load_exec(loaded_code, loaded_lib)
    des_vm = _vm.VirtualMachine(des_exec, ctx)
    data = []
    dtype = params_dict['dtype']
    for input_shape in params_dict['inference_input_shapes']:
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=input_shape)).astype(dtype))
        data.append(data_tvm)
    data = tuple(data)

    res = des_vm.run(*data)
    print("Evaluate vm inference cost of {} on {}".format(
        "your testing model", repr(ctx)))
    ftimer_warmup = des_vm.module.time_evaluator("invoke",
                                                 ctx,
                                                 number=1,
                                                 repeat=50)
    # Measure in millisecond.
    print("finished warming up and start testing vm compile performance")
    ftimer = des_vm.module.time_evaluator("invoke", ctx, number=1, repeat=600)
    # Measure in millisecond.
    prof_res = np.array(ftimer("main", *data).results) * 1000
    #prof_res = np.array(ftimer().results) * 1000
    print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
          (np.mean(prof_res), np.std(prof_res)))

示例#15

0

显示文件

文件： test_vm_serialization.py 项目： redpanda3/incubator-tvm

def test_if():
    x = relay.var('x', shape=(10, 10))
    y = relay.var('y', shape=(10, 10))
    equal = relay.op.equal(x, y)
    equal = relay.op.nn.batch_flatten(equal)
    f = relay.Function([x, y], relay.If(relay.op.min(equal, axis=[0, 1]), x,
                                        y))
    x_data = np.random.rand(10, 10).astype('float32')
    y_data = np.random.rand(10, 10).astype('float32')

    exe = create_exec(f)
    code, lib = exe.save()
    des_exec = _vm.Executable.load_exec(code, lib)
    des_vm = _vm.VirtualMachine(des_exec)
    des_vm.init(tvm.cpu())

    # same
    res = veval(des_vm, x_data, x_data)
    tvm.testing.assert_allclose(res.asnumpy(), x_data)

    # diff
    res = veval(des_vm, x_data, y_data)
    tvm.testing.assert_allclose(res.asnumpy(), y_data)

示例#16

0

显示文件

def run_module(
    tvmc_package: TVMCPackage,
    device: str,
    hostname: Optional[str] = None,
    port: Union[int, str] = 9090,
    rpc_key: Optional[str] = None,
    inputs: Optional[Dict[str, np.ndarray]] = None,
    fill_mode: str = "random",
    repeat: int = 10,
    number: int = 10,
    profile: bool = False,
    end_to_end: bool = False,
    options: dict = None,
):
    """Run a compiled graph executor module locally or remotely with
    optional input values.

    If input tensors are not specified explicitly, they can be filled
    with zeroes, ones or random data.

    Parameters
    ----------
    tvmc_package: TVMCPackage
        The compiled model package object that will be run.
    device: str,
        the device (e.g. "cpu" or "cuda") to be targeted by the RPC
        session, local or remote).
    hostname : str, optional
        The hostname of the target device on which to run.
    port : int, optional
        The port of the target device on which to run.
    rpc_key : str, optional
        The tracker key of the target device. If this is set, it
        will be assumed that remote points to a tracker.
    inputs : dict, optional
        A dictionary that maps input names to numpy values. If not provided,
        inputs will be generated using the fill_mode argument.
    fill_mode : str, optional
        The fill-mode to use when generating data for input tensors.
        Valid options are "zeros", "ones" and "random".
        Defaults to "random".
    repeat : int, optional
        How many times to repeat the run.
    number : int, optional
        The number of runs to measure within each repeat.
    profile : bool
        Whether to profile the run with the debug executor.
    end_to_end : bool
        Whether to measure the time of memory copies as well as model
        execution. Turning this on can provide a more realistic estimate
        of how long running the model in production would take.

    Returns
    -------
    outputs : dict
        a dictionary with output tensors, generated by the module
    times : list of str
        execution times generated by the time evaluator
    """
    if not isinstance(tvmc_package, TVMCPackage):
        raise TVMCException(
            "This model doesn't seem to have been compiled yet. "
            "Try calling tvmc.compile on the model before running it.")

    with ExitStack() as stack:
        # Currently only two package formats are supported: "classic" and
        # "mlf". The later can only be used for micro targets, i.e. with microTVM.
        if device == "micro":
            if tvmc_package.type != "mlf":
                raise TVMCException(
                    f"Model {tvmc_package.package_path} is not a MLF archive.")

            project_dir = get_project_dir(tvmc_package.project_dir)

            # This is guaranteed to work since project_dir was already checked when
            # building the dynamic parser to accommodate the project options, so no
            # checks are in place when calling GeneratedProject.
            project_ = project.GeneratedProject.from_directory(
                project_dir, options)
        else:
            if tvmc_package.type == "mlf":
                raise TVMCException(
                    "You're trying to run a model saved using the Model Library Format (MLF). "
                    "MLF can only be used to run micro device ('--device micro')."
                )

        if hostname:
            if isinstance(port, str):
                port = int(port)
            # Remote RPC
            if rpc_key:
                logger.debug("Running on remote RPC tracker with key %s.",
                             rpc_key)
                session = request_remote(rpc_key, hostname, port, timeout=1000)
            else:
                logger.debug("Running on remote RPC with no key.")
                session = rpc.connect(hostname, port)
        elif device == "micro":
            # Remote RPC (running on a micro target)
            logger.debug("Running on remote RPC (micro target).")
            try:
                session = tvm.micro.Session(project_.transport())
                stack.enter_context(session)
            except:
                raise TVMCException(
                    "Could not open a session with the micro target.")
        else:
            # Local
            logger.debug("Running a local session.")
            session = rpc.LocalSession()

        # Micro targets don't support uploading a model. The model to be run
        # must be already flashed into the micro target before one tries
        # to run it. Hence skip model upload for micro targets.
        if device != "micro":
            session.upload(tvmc_package.lib_path)
            lib = session.load_module(tvmc_package.lib_name)

        # TODO expand to other supported devices, as listed in tvm.rpc.client (@leandron)
        logger.debug("Device is %s.", device)
        if device == "cuda":
            dev = session.cuda()
        elif device == "cl":
            dev = session.cl()
        elif device == "metal":
            dev = session.metal()
        elif device == "vulkan":
            dev = session.vulkan()
        elif device == "rocm":
            dev = session.rocm()
        elif device == "micro":
            dev = session.device
            lib = session.get_system_lib()
        else:
            assert device == "cpu"
            dev = session.cpu()

        if tvmc_package.type == "vm":
            assert inputs is not None, "vm runner requires inputs to be provided as a dict"

            input_tensor = {}
            for e, i in inputs.items():
                input_tensor[e] = tvm.nd.array(i, dev)

            if profile:
                logger.debug("Creating vm with profile enabled.")
                exe = profiler_vm.VirtualMachineProfiler(lib, dev)
                res = exe.profile(**input_tensor, func_name="main")
                # This print is intentional
                print(res)
            else:
                exe = vm.VirtualMachine(lib, dev)

            exe_outputs = exe.invoke("main", **input_tensor)
            times = exe.benchmark(
                dev,
                **input_tensor,
                func_name="main",
                repeat=repeat,
                number=number,
                end_to_end=end_to_end,
            )

            # Special handling if the output only has a single value
            if not isinstance(exe_outputs, list):
                exe_outputs = [exe_outputs]

            outputs = {}
            for i, val in enumerate(exe_outputs):
                output_name = "output_{}".format(i)
                outputs[output_name] = val.numpy()
        else:
            # TODO(gromero): Adjust for micro targets.
            if profile:
                logger.debug("Creating runtime with profiling enabled.")
                module = debug_executor.create(tvmc_package.graph,
                                               lib,
                                               dev,
                                               dump_root="./prof")
            else:
                if device == "micro":
                    logger.debug(
                        "Creating runtime (micro) with profiling disabled.")
                    module = tvm.micro.create_local_graph_executor(
                        tvmc_package.graph, lib, dev)
                else:
                    logger.debug("Creating runtime with profiling disabled.")
                    module = executor.create(tvmc_package.graph, lib, dev)

            logger.debug("Loading params into the runtime module.")
            module.load_params(tvmc_package.params)

            logger.debug("Collecting graph input shape and type:")
            shape_dict, dtype_dict = module.get_input_info()
            logger.debug("Graph input shape: %s", shape_dict)
            logger.debug("Graph input type: %s", dtype_dict)

            inputs_dict = make_inputs_dict(shape_dict, dtype_dict, inputs,
                                           fill_mode)

            logger.debug("Setting inputs to the module.")
            module.set_input(**inputs_dict)

            # Run must be called explicitly if profiling
            if profile:
                logger.info("Running the module with profiling enabled.")
                report = module.profile()
                # This print is intentional
                print(report)

            if device == "micro":
                # TODO(gromero): Fix time_evaluator() for micro targets. Once it's
                # fixed module.benchmark() can be used instead and this if/else can
                # be removed.
                module.run()
                times = []
            else:
                # Call the benchmarking function of the executor.
                # Optionally measure e2e data transfers from the
                # CPU to device memory overheads (e.g. PCIE
                # overheads if the device is a discrete GPU).
                if end_to_end:
                    dev = session.cpu()
                times = module.benchmark(dev,
                                         number=number,
                                         repeat=repeat,
                                         end_to_end=end_to_end)

            logger.debug("Collecting the output tensors.")
            num_outputs = module.get_num_outputs()
            outputs = {}
            for i in range(num_outputs):
                output_name = "output_{}".format(i)
                outputs[output_name] = module.get_output(i).numpy()

        return TVMCResult(outputs, times)

示例#17

0

显示文件

def vm_tensorflow_model_process():
    def normalize_node_name(nodes):
        from tensorflow.compat import as_text
        if isinstance(nodes, list):
            ret = [as_text(node.split(':', 1)[0], 'ascii') for node in nodes]
        else:
            ret = as_text(nodes.split(':', 1)[0], 'ascii')

        return ret

    import tensorflow as tf
    from tvm.relay.frontend.tensorflow_parser import TFParser
    TF_pb_path = "/home/tiger/cuiqing.li/models/TF_checkpoint/latest"
    graph_def = TFParser(TF_pb_path).parse()
    input_names = ["input_ids_1:0", "input_mask_1:0", "segment_ids_1:0"]
    output_names = ["loss/Softmax:0"]
    input_shapes = [[1, 256], [1, 256], [1, 256]]

    input_names = [normalize_node_name(i) for i in input_names]
    output_names = [normalize_node_name(i) for i in output_names]
    mod, params = relay.frontend.from_tensorflow(
        graph_def,
        shape={k: v
               for k, v in zip(input_names, input_shapes)},
        layout=None,
        outputs=output_names)

    desired_layouts = {'nn.conv2d': ['NCHW', 'default']}
    seq = tvm.transform.Sequential([
        relay.transform.RemoveUnusedFunctions(),
        relay.transform.ConvertLayout(desired_layouts)
    ])
    with tvm.ir.transform.PassContext(opt_level=3):
        mod = seq(mod)

    target = tvm.target.cuda()
    ctx = tvm.context(str(target), 0)

    with tvm.transform.PassContext(opt_level=3,
                                   disabled_pass=["FoldScaleAxis"]):
        exe = vm.compile(mod, target, params=params)
        code, lib = exe.save()
        saved_dir = "tmp"
        if os.path.isdir("./tmp") == False:
            os.system("mkdir {}".format(saved_dir))

        path_lib = os.path.join(saved_dir, "lib.so")
        lib.export_library(path_lib)

        code_path = os.path.join(saved_dir, "code.ro")
        with open(code_path, "wb") as fo:
            fo.write(code)

        loaded_lib = tvm.runtime.load_module(path_lib)
        loaded_code = bytearray(open(code_path, "rb").read())

        # deserialize.
        des_exec = _vm.Executable.load_exec(loaded_code, loaded_lib)
        des_vm = _vm.VirtualMachine(des_exec, ctx)

        data = []
        idx = 0
        for input_shape in input_shapes:
            dtype = "int32"
            data_tvm = tvm.nd.array(
                (np.random.uniform(size=input_shape)).astype(dtype), ctx)
            data.append(data_tvm)
            idx += 1
        data = tuple(data)
        res = des_vm.run(*data)

        print("Evaluate vm inference cost of {} on {}".format(
            "your testing model", repr(ctx)))
        ftimer_warmup = des_vm.module.time_evaluator("invoke",
                                                     ctx,
                                                     number=1,
                                                     repeat=50)
        # Measure in millisecond.
        print("finished warming up and start testing vm compile performance")
        ftimer = des_vm.module.time_evaluator("invoke",
                                              ctx,
                                              number=1,
                                              repeat=100)
        # Measure in millisecond.
        prof_res = np.array(ftimer("main", *data).results) * 1000
        #prof_res = np.array(ftimer().results) * 1000
        print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))