def save_model_to_code(namespace, model, params, model_checksum, params_checksum, device, output, gencode_params): util.mkdir_p(output) cwd = os.path.dirname(__file__) j2_env = Environment(loader=FileSystemLoader(cwd + "/template"), trim_blocks=True) j2_env.filters["stringfy"] = stringfy template_name = "tensor_source.jinja2" counter = 0 for tensor in model.tensors: # convert tensor source = j2_env.get_template(template_name).render( tensor=tensor, tensor_id=counter, tag=namespace, ) with open(output + "/tensor" + str(counter) + ".cc", "w") as f: f.write(source) counter += 1 if gencode_params: template_name = "tensor_data.jinja2" source = j2_env.get_template(template_name).render( tag=namespace, model_data_size=len(params), model_data=params) with open(output + "/tensor_data.cc", "w") as f: f.write(source) template_name = "operator.jinja2" counter = 0 op_size = len(model.op) for start in range(0, op_size, 10): source = j2_env.get_template(template_name).render( start=start, end=min(start + 10, op_size), net=model, tag=namespace, device=device.value, ) with open(output + "/op" + str(counter) + ".cc", "w") as f: f.write(source) counter += 1 # generate model source files build_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") template_name = "model.jinja2" checksum = "{},{}".format(model_checksum, params_checksum) source = j2_env.get_template(template_name).render(net=model, tag=namespace, checksum=checksum, build_time=build_time) with open(output + "/model.cc", "w") as f: f.write(source) template_name = 'model_header.jinja2' source = j2_env.get_template(template_name).render(tag=namespace, ) with open(output + "/" + namespace + '.h', "w") as f: f.write(source)
def __init__(self, model, loss, metrics, optimizer, resume, config, train_logger=None): self.config = config self.logger = logging.getLogger(self.__class__.__name__) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.model = model.to(self.device) self.loss = loss self.metrics = metrics self.optimizer = optimizer self.train_logger = train_logger cfg_trainer = config['train'] self.epochs = cfg_trainer['epochs'] self.save_period = cfg_trainer['save_p'] self.verbosity = cfg_trainer['verbosity'] self.monitor = cfg_trainer.get('monitor', 'off') # configuration to monitor model performance and save best if self.monitor == 'off': self.mnt_mode = 'off' self.mnt_best = 0 else: self.mnt_mode, self.mnt_metric = self.monitor.split() assert self.mnt_mode in ['min', 'max'] self.mnt_best = math.inf if self.mnt_mode == 'min' else -math.inf self.early_stop = cfg_trainer.get('early_stop', math.inf) self.start_epoch = 1 # setup directory for checkpoint saving start_time = datetime.datetime.now().strftime('%m%d_%H%M%S') self.checkpoint_dir = os.path.join(cfg_trainer['save_dir'], start_time, 'checkpoints') self.log_dir = os.path.join(cfg_trainer['save_dir'], start_time, 'logs') self.writer = WriterTensorboardX(self.log_dir, self.logger, cfg_trainer['tbX']) # Save configuration file into checkpoint directory: mkdir_p(self.checkpoint_dir) config_save_path = os.path.join(self.checkpoint_dir, 'config.json') with open(config_save_path, 'w') as handle: json.dump(config, handle, indent=4, sort_keys=False) if resume: self._resume_checkpoint(resume)
def convert(conf, output, enable_micro=False): if ModelKeys.quantize_stat in conf: quantize_stat = conf[ModelKeys.quantize_stat] else: quantize_stat = False for model_name, model_conf in conf["models"].items(): model_output = output + "/" + model_name + "/model" org_model_dir = output + "/" + model_name + "/org_model" util.mkdir_p(model_output) util.mkdir_p(org_model_dir) model_conf = normalize_model_config(model_conf) model_file = util.download_or_get_model( model_conf[ModelKeys.model_file_path], # noqa model_conf[ModelKeys.model_sha256_checksum], # noqa output + "/" + model_name + "/org_model") model_conf[ModelKeys.model_file_path] = model_file if ModelKeys.weight_file_path in model_conf: weight_file = util.download_or_get_model( model_conf[ModelKeys.weight_file_path], model_conf[ModelKeys.weight_sha256_checksum], "/tmp/") model_conf[ModelKeys.weight_file_path] = weight_file # TODO: remove the following after quantize tool is made if ModelKeys.quantize_range_file in model_conf: range_file = util.download_or_get_model( model_conf[ModelKeys.quantize_range_file], "", model_output) model_conf[ModelKeys.quantize_range_file] = range_file mace_model = convert_model(model_conf, quantize_stat) try: visualizer = visualize_model.ModelVisualizer(model_name, mace_model, model_output) visualizer.save_html() except: # noqa print("Failed to visualize model:", sys.exc_info()) model, params = merge_params(mace_model, model_conf[ModelKeys.data_type]) if enable_micro: micro_converter = MicroConverter(model_conf, copy.deepcopy(model), copy.deepcopy(params), model_name) micro_converter.gen_code() micro_converter.package(model_output + "/" + model_name + "_micro.tar.gz") output_model_file = model_output + "/" + model_name + ".pb" output_params_file = model_output + "/" + model_name + ".data" with open(output_model_file, "wb") as f: f.write(model.SerializeToString()) with open(output_params_file, "wb") as f: f.write(bytearray(params)) with open(output_model_file + "_txt", "w") as f: f.write(str(model))
def gen_engine_interface_code(self, model_name): output_dir = self.gen_folder + 'engines/' + model_name + '/' shutil.rmtree(output_dir, ignore_errors=True) util.mkdir_p(output_dir) self.code_gen.gen_engine_factory( model_name, output_dir + 'micro_engine_factory.h', output_dir + 'micro_engine_factory.cc') self.code_gen.gen_engine_c_interface( model_name, output_dir + 'micro_engine_c_interface.h', output_dir + 'micro_engine_c_interface.cc')
def convert(conf, output, enable_micro=False): for model_name, model_conf in conf["models"].items(): model_output = output + "/" + model_name + "/model" org_model_dir = output + "/" + model_name + "/org_model" util.mkdir_p(model_output) util.mkdir_p(org_model_dir) model_conf = normalize_model_config(model_conf, model_output, org_model_dir) conf["models"][model_name] = model_conf net_confs = model_conf[ModelKeys.subgraphs] model = mace_pb2.MultiNetDef() add_input_output_tensor(model, model_conf) model_params = [] for net_name, net_conf in net_confs.items(): if "quantize_stat" in conf: net_conf["quantize_stat"] = conf["quantize_stat"] net_def_with_Data = convert_net(net_name, net_conf, enable_micro) try: visualizer = visualize_model.ModelVisualizer( net_name, net_def_with_Data, model_output) visualizer.save_html() except: # noqa print("Failed to visualize graph:", sys.exc_info()) net_def, params = merge_params(net_def_with_Data, net_conf[ModelKeys.data_type]) if enable_micro: convert_micro( model_name, net_confs, net_def, params, model_output, ) net_def.data_offset = len(model_params) net_def.data_size = len(params) model.net_def.extend([net_def]) model_params.extend(params) # store model and weight to files output_model_file = model_output + "/" + model_name + ".pb" output_params_file = model_output + "/" + model_name + ".data" with open(output_model_file, "wb") as f: f.write(model.SerializeToString()) with open(output_params_file, "wb") as f: f.write(bytearray(model_params)) with open(output_model_file + "_txt", "w") as f: f.write(str(model))
def gen_mace_engine_factory(model_name, embed_model_data, output): util.mkdir_p(output) cwd = os.path.dirname(__file__) j2_env = Environment(loader=FileSystemLoader(cwd + "/template"), trim_blocks=True) # generate mace_run BUILD file template_name = 'mace_engine_factory.h.jinja2' model_name = list(model_name) source = j2_env.get_template(template_name).render( model_tags=model_name, embed_model_data=embed_model_data, ) with open(output + '/mace_engine_factory.h', "w") as f: f.write(source)
def gen_code_from_model(self, model_name, pb_model, model_weights): net_def = pb_model output_dir = self.gen_folder + 'models/' + model_name + '/' shutil.rmtree(output_dir, ignore_errors=True) util.mkdir_p(output_dir) # comput mem size and mem block offset and update the net_def, # should count before ProtoConverter mem_computer = MemComputer(net_def, self.np_data_type) tensor_mem_size = mem_computer.compute() # gen the c++ NetDef struct net_def_converter = ProtoConverter(self.offset16, self.write_magic, NetDefExcludeFields) net_def_bytes = net_def_converter.proto_to_bytes(net_def) mace_check(net_def_bytes is not None, "proto_to_bytes failed.") self.code_gen.gen_net_def_data(model_name, net_def_bytes, output_dir + 'micro_net_def_data.h') # gen operator array (op_src_path_list, op_class_name_list) = \ self.op_resolver.get_op_desc_list_from_model() self.code_gen.gen_ops_data(model_name, op_src_path_list, op_class_name_list, output_dir + 'micro_ops_list.h') # gen the c++ Graph struct graph = GraphBuilder(net_def, self.op_resolver).build() graph_converter = ProtoConverter(self.offset16, self.write_magic) graph_bytes = graph_converter.proto_to_bytes(graph) self.code_gen.gen_graph_data(model_name, graph_bytes, output_dir + 'micro_graph_data.h') scratch_buffer_size = ScratchComputer(net_def, self.model_conf).compute_size() # gen micro engine config engine_data = {} engine_data['tensor_mem_size'] = tensor_mem_size engine_data['input_size'] = len(net_def.input_info) engine_data['scratch_buffer_size'] = scratch_buffer_size self.code_gen.gen_engin_config(model_name, engine_data, output_dir + 'micro_engine_config.cc') # gen micro model tensor data tensor_bytes = bytearray(model_weights) self.code_gen.gen_model_data(model_name, tensor_bytes, output_dir + 'micro_model_data.h')
def __init__(self, model_conf, net_def, model_weights, model_name, offset16=False, write_magic=False): self.model_conf = model_conf data_type = model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT) # self.net_def.arg if model_conf.get(ModelKeys.quantize_schema) == "int8": data_type = mace_pb2.DT_INT8 self.net_def = MicroIoConverter.convert(net_def, data_type) self.model_weights = model_weights self.model_name = model_name self.offset16 = offset16 self.write_magic = write_magic self.code_gen = MicroCodeGen() self.np_data_type = data_type_to_np_dt(data_type, np.float32) self.model_dir = "micro/codegen/" + model_name + "/" util.mkdir_p(self.model_dir) self.op_resolver = OpResolver(self.net_def, self.model_conf)
def __init__(self, model_conf, net_def, model_weights, model_name, offset16=False, write_magic=False): self.model_conf = model_conf data_type = model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT) self.net_def = MicroIoConverter.convert(net_def, data_type) self.model_weights = model_weights self.model_name = model_name self.offset16 = offset16 self.write_magic = write_magic self.code_gen = MicroCodeGen() data_type = model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT) self.np_data_type = data_type_to_np_dt(data_type, np.float32) self.gen_folder = 'micro/codegen/' util.mkdir_p(self.gen_folder) self.op_resolver = OpResolver(self.net_def, self.model_conf)
def save_model_to_code(namespace, model, params, model_checksum, params_checksum, output, gencode_params): util.mkdir_p(output) cwd = os.path.dirname(__file__) j2_env = Environment(loader=FileSystemLoader(cwd + "/template"), trim_blocks=True) j2_env.filters["stringfy"] = stringfy graph_size = len(model.net_def) for i in range(graph_size): save_graph_to_code(j2_env, namespace, i, model.net_def[i], output) if gencode_params: template_name = "tensor_data.jinja2" source = j2_env.get_template(template_name).render( model_tag=namespace, model_data_size=len(params), model_data=params) with open(output + "/tensor_data.cc", "w") as f: f.write(source) # generate model source files build_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") template_name = "model.jinja2" checksum = "{},{}".format(model_checksum, params_checksum) source = j2_env.get_template(template_name).render(multi_net=model, model_tag=namespace, checksum=checksum, build_time=build_time) with open(output + "/model.cc", "w") as f: f.write(source) template_name = 'model_header.jinja2' source = j2_env.get_template(template_name).render(model_tag=namespace) with open(output + "/" + namespace + '.h', "w") as f: f.write(source)
def run_model_for_device(flags, args, dev, model_name, model_conf): target_abi = flags.target_abi install_dir = run_target.default_install_dir(target_abi) + "/" + model_name sysdir = install_dir + "/interior" dev.mkdir(sysdir) runtime_list = [] for graph_name, graph_conf in model_conf[ModelKeys.subgraphs].items(): runtime = graph_conf[ModelKeys.runtime] runtime_list.append(runtime) mace_check(runtime != DeviceType.APU or target_abi == "arm64-v8a", "APU runtime does only support arm64-v8a") # install models to devices workdir = flags.output + "/" + model_name model_file = model_name + ".pb" model_data_file = model_name + ".data" model_path = workdir + "/model/" + model_file model_data_path = workdir + "/model/" + model_data_file if os.path.exists(model_path) and os.path.exists(model_data_path): dev.install(Target(model_path), install_dir) dev.install(Target(model_data_path), install_dir) else: MaceLogger.warning("No models exist in %s, use --model_file and" " --model_data_file specified in args" % model_path) if ModelKeys.check_tensors in model_conf: model_conf[ModelKeys.output_tensors] = model_conf[ ModelKeys.check_tensors] model_conf[ModelKeys.output_shapes] = model_conf[ ModelKeys.check_shapes] model_file_path = "" if not flags.gencode_model: model_file_path = install_dir + "/" + model_file model_data_file_path = "" if not flags.gencode_param: model_data_file_path = install_dir + "/" + model_data_file input_tensors_info = config_parser.find_input_tensors_info( model_conf[ModelKeys.subgraphs], model_conf[ModelKeys.input_tensors]) output_tensors_info = config_parser.find_output_tensors_info( model_conf[ModelKeys.subgraphs], model_conf[ModelKeys.output_tensors]) model_args = { "model_name": model_name, "model_file": model_file_path, "model_data_file": model_data_file_path, "input_node": ",".join(model_conf[ModelKeys.input_tensors]), "input_shape": join_2d_array(input_tensors_info[ModelKeys.input_shapes]), "output_node": ",".join(model_conf[ModelKeys.output_tensors]), "output_shape": join_2d_array(output_tensors_info[ModelKeys.output_shapes]), "input_data_format": ",".join([ df.name for df in input_tensors_info[ModelKeys.input_data_formats] ]), "output_data_format": ",".join([ df.name for df in output_tensors_info[ModelKeys.output_data_formats] ]) } opts = [ "--%s='%s'" % (arg_key, arg_val) for arg_key, arg_val in model_args.items() ] + args should_generate_data = (flags.validate or flags.tune or "--benchmark" in opts) if should_generate_data: tmpdirname = tempfile.mkdtemp() input_file_prefix = tmpdirname + "/" + model_name if ModelKeys.validation_inputs_data in model_conf: input_tensor = model_conf[ModelKeys.input_tensors] input_data = model_conf[ModelKeys.validation_inputs_data] mace_check( len(input_tensor) == len(input_data), "len(input_tensor) != len(validate_data") for i in range(len(input_tensor)): util.download_or_get_file( model_conf[ModelKeys.validation_inputs_data][i], "", util.formatted_file_name(input_file_prefix, input_tensor[i])) else: generate_input_data(input_file_prefix, model_conf[ModelKeys.input_tensors], input_tensors_info[ModelKeys.input_shapes], input_tensors_info[ModelKeys.input_ranges], input_tensors_info[ModelKeys.input_data_types]) dev.install(Target(tmpdirname), install_dir + "/validate_in") target_input_file = "%s/validate_in/%s" % (install_dir, model_name) target_output_dir = "%s/validate_out" % install_dir dev.mkdir(target_output_dir) target_output_file = target_output_dir + "/" + model_name opts += [ "--input_file=%s" % target_input_file, "--output_file=%s" % target_output_file ] # run envs = flags.envs.split(" ") + ["MACE_INTERNAL_STORAGE_PATH=%s" % sysdir] if flags.tune: envs += [ "MACE_TUNING=1", "MACE_RUN_PARAMETER_PATH=%s/interior/tune_params" % install_dir ] opts += ["--round=0"] mace_check(flags.vlog_level >= 0, "vlog_level should be greater than zeror") envs += ["MACE_CPP_MIN_VLOG_LEVEL=%s" % flags.vlog_level] build_dir = flags.build_dir + "/" + target_abi libs = [] if DeviceType.HEXAGON in runtime_list: libs += ["third_party/nnlib/%s/libhexagon_controller.so" % target_abi] elif runtime == DeviceType.HTA: libs += ["third_party/hta/%s/libhta_hexagon_runtime.so" % target_abi] elif DeviceType.APU in runtime_list: apu_libs = get_apu_so_paths(dev) libs += apu_libs cpp_shared_lib_path = os.path.join(build_dir, "install/lib/libc++_shared.so") if os.path.exists(cpp_shared_lib_path): libs.append(cpp_shared_lib_path) target = Target(build_dir + "/install/bin/mace_run", libs, opts=opts, envs=envs) run_target.run_target(target_abi, install_dir, target, dev) if DeviceType.GPU in runtime_list: opencl_dir = workdir + "/opencl" util.mkdir_p(opencl_dir) dev.pull( Target(install_dir + "/interior/mace_cl_compiled_program.bin"), "%s/%s_compiled_opencl_kernel.%s.%s.bin" % (opencl_dir, model_name, dev.info()["ro.product.model"].replace( ' ', ''), dev.info()["ro.board.platform"])) if flags.tune: dev.pull( Target(install_dir + "/interior/tune_params"), "%s/%s_tuned_opencl_parameter.%s.%s.bin" % (opencl_dir, model_name, dev.info()["ro.product.model"].replace( ' ', ''), dev.info()["ro.board.platform"])) if flags.validate: validate_model_file = util.download_or_get_model( model_conf[ModelKeys.model_file_path], model_conf[ModelKeys.model_sha256_checksum], tmpdirname) validate_weight_file = "" if ModelKeys.weight_file_path in model_conf: validate_weight_file = util.download_or_get_model( model_conf[ModelKeys.weight_file_path], model_conf[ModelKeys.weight_sha256_checksum], tmpdirname) dev.pull(Target(target_output_dir), tmpdirname + "/validate_out") output_file_prefix = tmpdirname + "/validate_out/" + model_name validate.validate(model_conf[ModelKeys.platform], validate_model_file, validate_weight_file, input_file_prefix, output_file_prefix, input_tensors_info[ModelKeys.input_shapes], output_tensors_info[ModelKeys.output_shapes], input_tensors_info[ModelKeys.input_data_formats], output_tensors_info[ModelKeys.output_data_formats], input_tensors_info[ModelKeys.input_tensors], output_tensors_info[ModelKeys.output_tensors], flags.validate_threshold, input_tensors_info[ModelKeys.input_data_types], flags.backend, "", "") if should_generate_data: shutil.rmtree(tmpdirname)
parser.add_argument('--tuning_files', type=str, default="", help="tuning params files") parser.add_argument('--output', type=str, default="build", help="output dir") parser.add_argument("--gencode", action="store_true", help="generate code") flgs, _ = parser.parse_known_args() return flgs if __name__ == '__main__': flags = parse_args() util.mkdir_p(flags.output) opencl_binary_files = [] if flags.binary_files: opencl_binary_files = flags.binary_files.split(",") opencl_tuning_files = [] if flags.tuning_files: opencl_tuning_files = flags.tuning_files.split(",") compiled_opencl_kernel_prefix = "compiled_opencl_kernel" tuned_opencl_parameter_prefix = "tuned_opencl_parameter" if not opencl_binary_files and not opencl_tuning_files: for root, dirs, files in os.walk("build", topdown=False): for name in files: if compiled_opencl_kernel_prefix in name: opencl_binary_files.append(os.path.join(root, name))
def save_model_to_file(model_name, model, params, output): util.mkdir_p(output) with open(output + "/" + model_name + ".pb", "wb") as f: f.write(model.SerializeToString()) with open(output + "/" + model_name + ".data", "wb") as f: f.write(params)