Exemplo n.º 1
0
Arquivo: measure.py Projeto: jcf94/tvm
 def __init__(self,
              priority=1,
              n_parallel=1,
              timeout=10,
              number=3,
              repeat=1,
              min_repeat_ms=0,
              cooldown_interval=0.0):
     ctx = tvm.context("cuda", 0)
     if ctx.exist:
         cuda_arch = "sm_" + "".join(ctx.compute_version.split('.'))
         set_cuda_target_arch(cuda_arch)
     host = '0.0.0.0'
     self.tracker = Tracker(host, port=9000, port_end=10000, silent=True)
     device_key = '$local$device$%d' % self.tracker.port
     self.server = Server(host,
                          port=self.tracker.port,
                          port_end=10000,
                          key=device_key,
                          use_popen=True,
                          silent=True,
                          tracker_addr=(self.tracker.host,
                                        self.tracker.port))
     self.runner = RPCRunner(device_key, host, self.tracker.port, priority,
                             n_parallel, timeout, number, repeat,
                             min_repeat_ms, cooldown_interval)
     # Wait for the processes to start
     time.sleep(0.5)
def tvm_compile(func, params, arch, dlr_model_name):
    gpu_code = None
    ###arch c4 avx2
    if arch in ['c4', 'm4']:
        target = "llvm -mcpu=core-avx2"
    ###arch c5 avx512
    elif arch in ['c5', 'm5']:
        target = "llvm -mcpu=skylake-avx512"
    elif arch in ['p3', 'ml_p3']:
        target = "cuda"
        gpu_code = "sm_70"
    elif arch in ['p2', 'ml_p2']:
        target = "cuda"
        gpu_code = "sm_37"
    ###arch lambda ssse3,sse4.2,avx
    elif arch == 'lambda':
        target = "llvm -mcpu=ivybridge"
    else:
        print("Valid arch: c4, m4, c5, m5, lambda")
        return

    if gpu_code is not None:
        #set cuda arch before relay.build
        from tvm.autotvm.measure.measure_methods import set_cuda_target_arch
        set_cuda_target_arch(gpu_code)
        print("gpu_code:", gpu_code)

    print('target:', target)
    print("Compiling...")

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(func, target, params=params)

    print("Compilation done")
    print("lib type_key: ", lib.type_key)

    print("Saving files")
    out_folder = arch + "/" + dlr_model_name + "/"
    os.makedirs(out_folder, exist_ok=True)
    # save the graph, lib and params into separate files
    path_lib = out_folder + "model.so"
    lib.export_library(path_lib)

    print("export_library done")

    with open(out_folder + "model.json", "w") as fo:
        fo.write(graph)
    with open(out_folder + "model.params", "wb") as fo:
        fo.write(relay.save_param_dict(params))

    print("Files saved to", out_folder)
Exemplo n.º 3
0
    def __init__(
        self,
        priority=1,
        n_parallel=1,
        timeout=10,
        number=3,
        repeat=1,
        min_repeat_ms=0,
        cooldown_interval=0.0,
        enable_cpu_cache_flush=False,
    ):
        # pylint: disable=import-outside-toplevel
        from tvm.rpc.tracker import Tracker
        from tvm.rpc.server import Server

        dev = tvm.device("cuda", 0)
        if dev.exist:
            cuda_arch = "sm_" + "".join(dev.compute_version.split("."))
            set_cuda_target_arch(cuda_arch)
        host = "0.0.0.0"
        self.tracker = Tracker(host, port=9000, port_end=10000, silent=True)
        device_key = "$local$device$%d" % self.tracker.port
        self.server = Server(
            host,
            port=self.tracker.port,
            port_end=10000,
            key=device_key,
            use_popen=True,
            silent=True,
            tracker_addr=(self.tracker.host, self.tracker.port),
        )
        self.runner = RPCRunner(
            device_key,
            host,
            self.tracker.port,
            priority,
            n_parallel,
            timeout,
            number,
            repeat,
            min_repeat_ms,
            cooldown_interval,
            enable_cpu_cache_flush,
        )
        # Wait for the processes to start
        time.sleep(0.5)
Exemplo n.º 4
0
    args = parser.parse_args()

    network = args.network
    num_classes = 1000
    data_shape = get_data_shape(network)
    ext_accel = None if args.ext_accel == 'none' else args.ext_accel
    cuda_arch = args.cuda_arch

    print("===========Loading model %s" % network)
    loaded_json = open('%s.json' % network).read()
    loaded_params = bytearray(open('%s.params' % network, 'rb').read())
    net = nnvm.graph.load_json(loaded_json)
    params = nnvm.compiler.load_param_dict(loaded_params)
    opt_level = 3
    target = tvm.target.cuda()
    set_cuda_target_arch(cuda_arch)
    target_host = 'llvm -target=%s' % args.target_host
    print("===========Start to compile %s graph with params, external accelerator: %s" % (network, ext_accel))
    start = time.time()
    with nnvm.compiler.build_config(opt_level=opt_level, ext_accel=ext_accel):
        graph, lib, params = nnvm.compiler.build(
            net, target, shape={"data": data_shape}, params=params, target_host=target_host)
    print("===========Compiling model %s took %.3fs" % (network, time.time() - start))

    print("===========Saving lowered graph for model %s" % network)
    with open('%s_ext_accel_%s_%s.json' % (network, ext_accel, cuda_arch), "w") as fo:
        fo.write(graph.json())
    print("===========Saving module for model %s" % network)
    if lib.is_empty():
        print("lib is empty")
    else:
Exemplo n.º 5
0
#
# You can register multiple devices to the tracker to accelerate the measurement in tuning.

###########################################
# Set Tuning Options
# ------------------
# Before tuning, we should apply some configurations. Here I use an RK3399 board
# as example. In your setting, you should modify the target and device_key accordingly.
# set :code:`use_android` to True if you use android phone.

#### DEVICE CONFIG ####

# TODO: add model to arch mapping
target = tvm.target.cuda(model="tx2")
from tvm.autotvm.measure.measure_methods import set_cuda_target_arch
set_cuda_target_arch('sm_62')

# Replace "aarch64-linux-gnu" with the correct target of your board.
# This target host is used for cross compilation. You can query it by :code:`gcc -v` on your device.
target_host = 'llvm -target=aarch64-linux-gnu'

# Also replace this with the device key in your tracker
device_key = 'tx2'

# Set this to True if you use android phone
use_android = False

#### TUNING OPTION ####
network = 'resnet-18'
log_file = "%s.%s.log" % (device_key, network)
dtype = 'float32'
Exemplo n.º 6
0
scripted_model = torch.jit.trace(model, input_data).eval()
shape_dict = {'input.1':input_shape}
shape_list = [('input.1', input_shape)]
#onnx_model = onnx.load('peleenet_1D_depth.onnx')
#mod, params = relay.frontend.from_onnx(onnx_model, shape_dict,dtype='float16')
mod, params = relay.frontend.from_pytorch(scripted_model,
                                          shape_list)
                                          
if quant:
	print('quant')
	mod = quantize(mod, params, '/home/kalyan/libraries/Pelee.Pytorch/imgs/VOC' ,data_aware=True)
	params = None

if demo == 'rpc':
	print('RPC')
	set_cuda_target_arch('sm_53')
	tgt_cuda = tvm.target.cuda(model="nano")
	tgt_host="llvm -target=aarch64-linux-gnu"
	tgt = tgt_cuda
else :
	tgt = tvm.target.cuda()
	tgt_host="llvm"
	#tgt = tgt_host
	ctx = tvm.gpu(0)

'''tasks = autotvm.task.extract_from_program(mod ,params , tgt,target_host=tgt_host,ops=(relay.op.get("nn.conv2d"),))
if demo == 'rpc':
	tune_tasks(tasks, **tuning_rpc_option)
else:
	tune_tasks(tasks, **tuning_option)'''
#with autotvm.apply_history_best(log_file):