def __init__(self, priority=1, n_parallel=1, timeout=10, number=3, repeat=1, min_repeat_ms=0, cooldown_interval=0.0): ctx = tvm.context("cuda", 0) if ctx.exist: cuda_arch = "sm_" + "".join(ctx.compute_version.split('.')) set_cuda_target_arch(cuda_arch) host = '0.0.0.0' self.tracker = Tracker(host, port=9000, port_end=10000, silent=True) device_key = '$local$device$%d' % self.tracker.port self.server = Server(host, port=self.tracker.port, port_end=10000, key=device_key, use_popen=True, silent=True, tracker_addr=(self.tracker.host, self.tracker.port)) self.runner = RPCRunner(device_key, host, self.tracker.port, priority, n_parallel, timeout, number, repeat, min_repeat_ms, cooldown_interval) # Wait for the processes to start time.sleep(0.5)
def tvm_compile(func, params, arch, dlr_model_name): gpu_code = None ###arch c4 avx2 if arch in ['c4', 'm4']: target = "llvm -mcpu=core-avx2" ###arch c5 avx512 elif arch in ['c5', 'm5']: target = "llvm -mcpu=skylake-avx512" elif arch in ['p3', 'ml_p3']: target = "cuda" gpu_code = "sm_70" elif arch in ['p2', 'ml_p2']: target = "cuda" gpu_code = "sm_37" ###arch lambda ssse3,sse4.2,avx elif arch == 'lambda': target = "llvm -mcpu=ivybridge" else: print("Valid arch: c4, m4, c5, m5, lambda") return if gpu_code is not None: #set cuda arch before relay.build from tvm.autotvm.measure.measure_methods import set_cuda_target_arch set_cuda_target_arch(gpu_code) print("gpu_code:", gpu_code) print('target:', target) print("Compiling...") with relay.build_config(opt_level=3): graph, lib, params = relay.build(func, target, params=params) print("Compilation done") print("lib type_key: ", lib.type_key) print("Saving files") out_folder = arch + "/" + dlr_model_name + "/" os.makedirs(out_folder, exist_ok=True) # save the graph, lib and params into separate files path_lib = out_folder + "model.so" lib.export_library(path_lib) print("export_library done") with open(out_folder + "model.json", "w") as fo: fo.write(graph) with open(out_folder + "model.params", "wb") as fo: fo.write(relay.save_param_dict(params)) print("Files saved to", out_folder)
def __init__( self, priority=1, n_parallel=1, timeout=10, number=3, repeat=1, min_repeat_ms=0, cooldown_interval=0.0, enable_cpu_cache_flush=False, ): # pylint: disable=import-outside-toplevel from tvm.rpc.tracker import Tracker from tvm.rpc.server import Server dev = tvm.device("cuda", 0) if dev.exist: cuda_arch = "sm_" + "".join(dev.compute_version.split(".")) set_cuda_target_arch(cuda_arch) host = "0.0.0.0" self.tracker = Tracker(host, port=9000, port_end=10000, silent=True) device_key = "$local$device$%d" % self.tracker.port self.server = Server( host, port=self.tracker.port, port_end=10000, key=device_key, use_popen=True, silent=True, tracker_addr=(self.tracker.host, self.tracker.port), ) self.runner = RPCRunner( device_key, host, self.tracker.port, priority, n_parallel, timeout, number, repeat, min_repeat_ms, cooldown_interval, enable_cpu_cache_flush, ) # Wait for the processes to start time.sleep(0.5)
args = parser.parse_args() network = args.network num_classes = 1000 data_shape = get_data_shape(network) ext_accel = None if args.ext_accel == 'none' else args.ext_accel cuda_arch = args.cuda_arch print("===========Loading model %s" % network) loaded_json = open('%s.json' % network).read() loaded_params = bytearray(open('%s.params' % network, 'rb').read()) net = nnvm.graph.load_json(loaded_json) params = nnvm.compiler.load_param_dict(loaded_params) opt_level = 3 target = tvm.target.cuda() set_cuda_target_arch(cuda_arch) target_host = 'llvm -target=%s' % args.target_host print("===========Start to compile %s graph with params, external accelerator: %s" % (network, ext_accel)) start = time.time() with nnvm.compiler.build_config(opt_level=opt_level, ext_accel=ext_accel): graph, lib, params = nnvm.compiler.build( net, target, shape={"data": data_shape}, params=params, target_host=target_host) print("===========Compiling model %s took %.3fs" % (network, time.time() - start)) print("===========Saving lowered graph for model %s" % network) with open('%s_ext_accel_%s_%s.json' % (network, ext_accel, cuda_arch), "w") as fo: fo.write(graph.json()) print("===========Saving module for model %s" % network) if lib.is_empty(): print("lib is empty") else:
# # You can register multiple devices to the tracker to accelerate the measurement in tuning. ########################################### # Set Tuning Options # ------------------ # Before tuning, we should apply some configurations. Here I use an RK3399 board # as example. In your setting, you should modify the target and device_key accordingly. # set :code:`use_android` to True if you use android phone. #### DEVICE CONFIG #### # TODO: add model to arch mapping target = tvm.target.cuda(model="tx2") from tvm.autotvm.measure.measure_methods import set_cuda_target_arch set_cuda_target_arch('sm_62') # Replace "aarch64-linux-gnu" with the correct target of your board. # This target host is used for cross compilation. You can query it by :code:`gcc -v` on your device. target_host = 'llvm -target=aarch64-linux-gnu' # Also replace this with the device key in your tracker device_key = 'tx2' # Set this to True if you use android phone use_android = False #### TUNING OPTION #### network = 'resnet-18' log_file = "%s.%s.log" % (device_key, network) dtype = 'float32'
scripted_model = torch.jit.trace(model, input_data).eval() shape_dict = {'input.1':input_shape} shape_list = [('input.1', input_shape)] #onnx_model = onnx.load('peleenet_1D_depth.onnx') #mod, params = relay.frontend.from_onnx(onnx_model, shape_dict,dtype='float16') mod, params = relay.frontend.from_pytorch(scripted_model, shape_list) if quant: print('quant') mod = quantize(mod, params, '/home/kalyan/libraries/Pelee.Pytorch/imgs/VOC' ,data_aware=True) params = None if demo == 'rpc': print('RPC') set_cuda_target_arch('sm_53') tgt_cuda = tvm.target.cuda(model="nano") tgt_host="llvm -target=aarch64-linux-gnu" tgt = tgt_cuda else : tgt = tvm.target.cuda() tgt_host="llvm" #tgt = tgt_host ctx = tvm.gpu(0) '''tasks = autotvm.task.extract_from_program(mod ,params , tgt,target_host=tgt_host,ops=(relay.op.get("nn.conv2d"),)) if demo == 'rpc': tune_tasks(tasks, **tuning_rpc_option) else: tune_tasks(tasks, **tuning_option)''' #with autotvm.apply_history_best(log_file):