示例#1
0
def test_env_scope():
    env = vta.get_env()
    cfg = env.pkg_config().cfg_dict
    cfg["TARGET"] = "xyz"
    with vta.Environment(cfg):
        assert vta.get_env().TARGET == "xyz"
    assert vta.get_env().TARGET == env.TARGET
示例#2
0
def test_env_scope():
    env = vta.get_env()
    cfg = env.pkg_config().cfg_dict
    cfg["TARGET"] = "xyz"
    with vta.Environment(cfg):
        assert vta.get_env().TARGET == "xyz"
    assert vta.get_env().TARGET == env.TARGET
示例#3
0
def run_single(model, target_name, device, config):
    """Run the experiment on a single target."""
    with init_vta_env(target_name):
        vta_env = vta.get_env()

        target = vta_env.target if device == 'vta' else vta_env.target_vta_cpu

        # Make sure TVM was compiled with RPC support.
        assert tvm.module.enabled('rpc')
        remote = init_remote(vta_env, config)
        # Get execution context from remote
        ctx = remote.ext_dev(0) if device == 'vta' else remote.cpu(0)

        graph_module = build_model(model, remote, target, ctx, vta_env)
        mean_time = run_model(graph_module, remote, ctx, vta_env, config)

        return mean_time
示例#4
0
def convert_to_vta(model_path, image_channel, image_size):
    device = torch.device('cpu')
    model = torch.load(model_path, map_location=device)
    model = model.eval()

    input_shape = [1, image_channel, image_size, image_size]
    input_data = torch.randn(input_shape)
    scripted_model = torch.jit.trace(model, input_data).eval()

    shape_list = [(input_name, input_shape)]
    mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)
    print(mod["main"])

    remote = rpc.LocalSession()
    ctx = remote.ext_dev(0)

    target = 'vta'
    target_host = 'vta'
    env = vta.get_env()
    pack_dict = {
        "yolov3-tiny": ["nn.max_pool2d", "cast", 8, 237],
    }
    MODEL_NAME = 'yolov3-tiny'
    with tvm.transform.PassContext(opt_level=2):
        with relay.quantize.qconfig(global_scale=33.0,
                                    skip_conv_layers=[0],
                                    store_lowbit_output=True,
                                    round_for_shift=True):
            mod = relay.quantize.quantize(mod, params=params)
        print(mod["main"])
        mod = graph_pack(mod["main"],
                         env.BATCH,
                         env.BLOCK_OUT,
                         env.WGT_WIDTH,
                         start_name=pack_dict[MODEL_NAME][0],
                         stop_name=pack_dict[MODEL_NAME][1],
                         start_name_idx=pack_dict[MODEL_NAME][2],
                         stop_name_idx=pack_dict[MODEL_NAME][3])
    return mod
args = parser.parse_args()

ctx = mx.cpu()
# get dense layer
if args.nonsplit:
    dense = vision.resnet18_v1(pretrained=True, ctx=ctx).output
else:
    dense = gluon.nn.Dense(1000)
    dense.load_parameters('params/dense-1.params', ctx=ctx)

#get categories for imagenet
categories = np.array(json.load(open('image_net_labels.json', 'r')))

assert tvm.module.enabled('rpc')
# Load VTA parameters from the vta/config/vta_config.json file
env = vta.get_env()

# device, `vta` or `cpu`
device = 'vta'
target = env.target if device == 'vta' else env.target_vta_cpu

start_pack = 'nn.max_pool2d'
stop_pack = 'nn.global_avg_pool2d'

# perform inference and gather execution statistics
num = 1  # number of times we run module for a single measurement
rep = 1  # number of measurements (we derive std dev from this)

# ip addresses of pynq boards, hardcoded for demo
if args.nonsplit:
    pynqs = ['192.168.2.5']
示例#6
0
# ---------
# We start by programming the Pynq's FPGA and building its RPC runtime
# as we did in the VTA introductory tutorial.

from __future__ import absolute_import, print_function

import os
import tvm
import vta
import numpy as np
from tvm import rpc
from tvm.contrib import util
from vta.testing import simulator

# Load VTA parameters from the vta/config/vta_config.json file
env = vta.get_env()

# We read the Pynq RPC host IP address and port number from the OS environment
host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99")
port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091"))

# We configure both the bitstream and the runtime system on the Pynq
# to match the VTA configuration specified by the vta_config.json file.
if env.TARGET == "pynq":

    # Make sure that TVM was compiled with RPC=1
    assert tvm.module.enabled("rpc")
    remote = rpc.connect(host, port)

    # Reconfigure the JIT runtime
    vta.reconfig_runtime(remote)
示例#7
0
def test_env():
    env = vta.get_env()
    mock = env.mock
    assert mock.alu == "skip_alu"
示例#8
0
def test_env():
    env = vta.get_env()
    mock = env.mock
    assert mock.alu == "skip_alu"
示例#9
0
import os
from os.path import exists
import numpy as np
from mxnet.gluon.model_zoo import vision

import tvm
from tvm import autotvm, relay
from tvm.relay import op, transform

import vta
from vta.top import graph_pack
from vta.top.graphpack import run_opt_pass

# Load VTA parameters from the vta/config/vta_config.json file
ENV = vta.get_env()
assert ENV.target.device_name == "vta"
# Dictionary lookup for when to start/end bit packing
PACK_DICT = {
    "resnet18_v1": ["nn.max_pool2d", "nn.global_avg_pool2d", None, None],
}

# Name of Gluon model to compile
MODEL = "resnet18_v1"
assert MODEL in PACK_DICT


def merge_transform_to_mxnet_model(mod):
    """ Add Image Transform Logic Into Model """
    svalue = np.array([123., 117., 104.])
    sub_data = relay.Constant(tvm.nd.array(svalue)).astype("float32")
示例#10
0
def main(model,
         start_pack,
         stop_pack,
         data_shape=(1, 3, 224, 224),
         dtype='float32'):
    # Make sure that TVM was compiled with RPC=1
    assert tvm.module.enabled("rpc")

    ######################################################################
    # Define the platform and model targets
    # -------------------------------------
    # Execute on CPU vs. VTA, and define the model.

    # Load VTA parameters from the vta/config/vta_config.json file
    env = vta.get_env()

    # Set ``device=arm_cpu`` to run inference on the CPU
    # or ``device=vta`` to run inference on the FPGA.
    device = "vta"
    target = env.target if device == "vta" else env.target_vta_cpu

    # Name of Gluon model to compile
    # The ``start_pack`` and ``stop_pack`` labels indicate where
    # to start and end the graph packing relay pass: in other words
    # where to start and finish offloading to VTA.

    ######################################################################
    # Obtain an execution remote
    # ---------------------------------
    # When target is 'pynq', reconfigure FPGA and runtime.
    # Otherwise, if target is 'sim', execute locally.
    print(f"Target is {env.TARGET}")
    if env.TARGET in ["sim", "tsim"]:
        remote = rpc.LocalSession()
    else:
        print(f"Error, incorrect target for benchmarking: {env.TARGET}")

    # Get execution context from remote
    ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)

    ######################################################################
    # Build the inference graph runtime
    # ---------------------------------
    # Grab ResNet-18 model from Gluon model zoo and compile with Relay.
    # The compilation steps are:
    #    1) Front end translation from MxNet into Relay module.
    #    2) Apply 8-bit quantization: here we skip the first conv layer,
    #       and dense layer which will both be executed in fp32 on the CPU.
    #    3) Perform graph packing to alter the data layout for tensorization.
    #    4) Perform constant folding to reduce number of operators (e.g. eliminate
    #       batch norm multiply).
    #    5) Perform relay build to object file.
    #    6) Load the object file onto remote (FPGA device).
    #    7) Generate graph runtime, `m`.

    # Load pre-configured AutoTVM schedules
    with autotvm.tophub.context(target):

        # Populate the shape and data type dictionary for ResNet input
        dtype_dict = {"data": 'float32'}
        shape_dict = {"data": data_shape}

        # Measure build start time
        build_start = time.time()

        # Start front end compilation
        if model == 'resnet':
            mod, params = test_resnet_mxnet(env)
        elif model == 'yolo':
            mod, params = test_yolo_darknet()
        elif model == 'lenet':
            mod, params = lenet()
        elif model == 'mobilenet':
            mod, params = mobilenet()
        else:
            print(f"Error, incorrect model name: {model}")

        ### Need to bind params

        # Update shape and type dictionary
        shape_dict.update({k: v.shape for k, v in params.items()})
        dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
        with relay.quantize.qconfig(global_scale=8.0, skip_conv_layers=[0]):
            relay_prog = relay.quantize.quantize(mod['main'], params=params)

        print(f"Finishing quantizing graph")
        # Perform graph packing and constant folding for VTA target
        if target.device_name == "vta":
            assert env.BLOCK_IN == env.BLOCK_OUT
            relay_prog = graph_pack(relay_prog,
                                    env.BATCH,
                                    env.BLOCK_OUT,
                                    env.WGT_WIDTH,
                                    start_name=start_pack,
                                    stop_name=stop_pack)

        print(f"Finishing packing graph")

        # Compile Relay program with AlterOpLayout disabled
        with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
            if target.device_name != "vta":
                graph, lib, params = relay.build(relay_prog,
                                                 target=target,
                                                 params=params,
                                                 target_host=env.target_host)
            else:
                with vta.build_config():
                    graph, lib, params = relay.build(
                        relay_prog,
                        target=target,
                        params=params,
                        target_host=env.target_host)

        # Measure Relay build time
        build_time = time.time() - build_start
        print(model + " inference graph built in {0:.2f}s!".format(build_time))

        # Send the inference library over to the remote RPC server
        temp = util.tempdir()
        lib.save(temp.relpath("graphlib.o"))
        remote.upload(temp.relpath("graphlib.o"))
        lib = remote.load_module("graphlib.o")

        # Graph runtime
        m = graph_runtime.create(graph, lib, ctx)
    #
    # # Set the network parameters and inputs
    data = np.random.uniform(size=data_shape).astype(dtype)

    m.set_input(**params)
    m.set_input('data', tvm.nd.array(data.astype(dtype)))

    # Perform inference and gather execution statistics
    # More on: https://docs.tvm.ai/api/python/module.html#tvm.module.Module.time_evaluator
    num = 1  # number of times we run module for a single measurement
    rep = 1  # number of measurements (we derive std dev from this)
    timer = m.module.time_evaluator("run", ctx, number=num, repeat=rep)

    if env.TARGET in ["sim", "tsim"]:
        simulator.clear_stats()
        timer()
        sim_stats = simulator.stats()
        print("\nExecution statistics:")
        for k, v in sim_stats.items():
            # Since we execute the workload many times, we need to normalize stats
            # Note that there is always one warm up run
            # Therefore we divide the overall stats by (num * rep + 1)
            print("\t{:<16}: {:>16}".format(k, v // (num * rep + 1)))
    else:
        tcost = timer()
        std = np.std(tcost.results) * 1000
        mean = tcost.mean * 1000
        print("\nPerformed inference in %.2fms (std = %.2f) for %d samples" %
              (mean, std, env.BATCH))
        print("Average per sample inference time: %.2fms" % (mean / env.BATCH))