Exemplo n.º 1
0
def _launch_kernel_from_path(inputs, op_attrs, func_type, func_source,
                             func_name):
    kernel_meta_path = get_kernel_meta_path()
    cuda_path = os.path.realpath(kernel_meta_path)
    if not os.path.isdir(cuda_path):
        os.makedirs(cuda_path, exist_ok=True)
    if not func_name:
        raise ValueError("Can't find name of function")
    if not func_source:
        raise ValueError("Can't find source of function: {}".format(
            str(func_name)))

    op_imply_path = os.path.realpath(kernel_meta_path + func_name + ".py")
    if os.path.exists(op_imply_path):
        os.remove(op_imply_path)
    try:
        with open(op_imply_path, 'at') as file:
            fcntl.flock(file.fileno(), fcntl.LOCK_EX)
            file.seek(0, 2)
            if file.tell() == 0:
                file.write(func_source)
        os.chmod(op_imply_path, 0o400)
    except Exception:
        logging.error(traceback.format_exc())
        return None

    custom_mod_name = Path(op_imply_path).resolve().stem
    mod_spec = importlib.util.spec_from_file_location(custom_mod_name,
                                                      op_imply_path)
    custom_mod = importlib.util.module_from_spec(mod_spec)
    mod_spec.loader.exec_module(custom_mod)
    func_kernel = getattr(custom_mod, func_name, None)

    if func_kernel is None:
        raise ValueError(
            "Can't find the following function under module {}: {}".format(
                custom_mod_name, func_name))

    if "__wrapped__" in func_kernel.__dict__:
        func_kernel = func_kernel.__dict__["__wrapped__"]
    func_kernel.__globals__["tvm"] = globals()["tvm"]

    if func_type == "ir_builder":
        return func_kernel(inputs, op_attrs)
    else:
        inputs = list(inputs)
        return func_kernel(*inputs, **op_attrs)
Exemplo n.º 2
0
def dump_cpu_meta(mod, kernel_name):
    """
    Function for dumping cpu meta.

    Args:
        mod: the module code of cpu.
    """

    title_dict = dict()

    # kernel name
    code = mod.get_source()
    title_dict["kernelName"] = kernel_name + "_kernel"

    #thread number
    thread_num = "null"
    title_dict["threadNumber"] = thread_num

    #meta path
    path_name = get_kernel_meta_path()
    meta_path = os.path.realpath(path_name)
    if not os.path.isdir(meta_path):
        os.makedirs(meta_path, exist_ok=True)

    # save libraries to kernel meta
    obj_file = os.path.join(meta_path, kernel_name + '.o')
    lib_file = os.path.join(meta_path, kernel_name + '.so')
    mod.save(obj_file, 'k')
    mod.export_library(lib_file)

    # sha256 of files
    obj_sha256 = hashlib.sha256()
    lib_sha256 = hashlib.sha256()
    with open(obj_file, 'rb') as f:
        obj_sha256.update(f.read())
    with open(lib_file, 'rb') as f:
        lib_sha256.update(f.read())
    obj_hash_str = obj_sha256.hexdigest()
    lib_hash_str = lib_sha256.hexdigest()
    title_dict["objSha256"] = obj_hash_str
    title_dict["sha256"] = lib_hash_str

    # save json file to kernel meta
    json_file = os.path.join(meta_path, kernel_name + ".json")
    write_code(title_dict, json_file)
Exemplo n.º 3
0
def _update_workspace_data(kernel_name, input_for_mod, output_indexes):
    """Update workspace tensors."""
    workspace_tensors = []
    json_file = get_kernel_meta_path() + kernel_name + ".json"
    if os.path.isfile(json_file):
        with open(json_file, 'r') as f:
            kernel_json = f.read()
            kernel_desc = json.loads(kernel_json)
            if "workspace" in kernel_desc:
                workspace_bytes = kernel_desc["workspace"]["size"]
                item = np.full(workspace_bytes, np.nan, np.int8)
                workspace_tensors.append(item)
    else:
        logging.warning("Kernel json file %s not found", json_file)

    # Add workspace tensors to input_for_mod
    if len(workspace_tensors) > 0:
        # workspace tensors are placed after inputs and outputs, so index in output_indexes should
        # be converted to positive number first, otherwise -1 will point to the last workspace tensor
        # instead of the last output tensor.
        output_indexes = [i if i > 0 else i + len(input_for_mod) for i in output_indexes]
        input_for_mod.extend(workspace_tensors)

    return output_indexes
Exemplo n.º 4
0
def _op_build_cuda(opnames, computes, args, device, kernel_name):
    kernel_meta_path = get_kernel_meta_path()
    cuda_path = os.path.realpath(kernel_meta_path)
    if not os.path.isdir(cuda_path):
        os.makedirs(cuda_path, exist_ok=True)
    if not opnames:
        logging.error("no opname given.")
        return None

    schedule_name = 'gpu_schedule_' + opnames[0]
    schedule_func = getattr(akg.ops.array.gpu, schedule_name)
    if not isinstance(schedule_func, (types.FunctionType, typing.Callable)):
        logging.error("no schedule func found %s", str(schedule_name))
        return None

    ptx_file = os.path.realpath(kernel_meta_path + kernel_name + ".ptx")
    if os.path.exists(ptx_file):
        os.remove(ptx_file)
    try:
        with open(ptx_file, 'at') as file:
            fcntl.flock(file.fileno(), fcntl.LOCK_EX)
            file.seek(0, 2)
            if file.tell() == 0:
                s = schedule_func(computes)
                foo = akg.tvm.build(s, args, device, name=kernel_name)
                ptx_code = foo.imported_modules[0].get_source("ptx")
                file.write(ptx_code)
                json_file = os.path.realpath(kernel_meta_path + kernel_name +
                                             ".json")
                kernel_info = (ptx_code, json_file, kernel_name)
                gpu_utils.save_gpu_params(s, args, kernel_info)
        os.chmod(ptx_file, 0o400)
    except Exception:
        logging.error(traceback.format_exc())
        return None
    return True
Exemplo n.º 5
0
def dump_cuda_meta(code, ptx, thread_info, workspace=None):
    """
    Function for dumping cuda meta.

    Args:
        code: gpu code.
        ptx: ptx code.
        thread_info: thread info, written to json file.
        workspace: workspace info, which will be allocated in global memory.
    """
    title_dict = dict()

    # kernel name
    kernel_name = code.split("_kernel")[0].split(" ")[-1]
    title_dict["kernelName"] = kernel_name + "_kernel0"

    # sha256 of ptx
    sha256 = hashlib.sha256()
    sha256.update(ptx.encode("utf-8"))
    hash_str = sha256.hexdigest()
    title_dict["sha256"] = hash_str

    # thread info
    thread_info_dict = {
        "blockIdx.x": 1,
        "blockIdx.y": 1,
        "blockIdx.z": 1,
        "threadIdx.x": 1,
        "threadIdx.y": 1,
        "threadIdx.z": 1
    }
    for thread_tag in thread_info_dict.keys():
        if thread_tag in thread_info:
            if isinstance(thread_info[thread_tag], int):
                thread_info_dict[thread_tag] = thread_info[thread_tag]
            elif isinstance(thread_info[thread_tag], akg.tvm.expr.IntImm):
                thread_info_dict[thread_tag] = thread_info[thread_tag].value
    title_dict.update(thread_info_dict)

    # workspace
    workspace_dict = parse_workspace(workspace)
    if workspace_dict is not None:
        title_dict["workspace"] = workspace_dict

    meta_path = get_kernel_meta_path()
    cuda_path = os.path.realpath(meta_path)
    if not os.path.isdir(cuda_path):
        os.makedirs(cuda_path, exist_ok=True)

    # save ptx file to cuda meta
    ptx_file = os.path.realpath(meta_path + kernel_name + ".ptx")
    if os.path.exists(ptx_file):
        os.remove(ptx_file)
    with open(ptx_file, "at") as f:
        fcntl.flock(f.fileno(), fcntl.LOCK_EX)
        f.seek(0, 2)
        if f.tell() == 0:
            f.write(ptx)

    # modify the file permisson to 400
    os.chmod(ptx_file, 0o400)

    # save json file to cuda meta
    json_file = os.path.realpath(meta_path + kernel_name + ".json")
    write_code(title_dict, json_file)
Exemplo n.º 6
0
def launch(kernel, args, output=(-1, )):
    """
    simulated run CCE kernel by aic model.

    Args:
        kernel (str): str of kernel name, or CCE Module.
        args (Union[list, tuple]): list or tuple of numpy array.
        output (Union[list, tuple]): list or tuple of output argment index.
    Returns:
        output numpy array, or tuple of numpy array if multi-output.
    """
    def _check_exists(value, error_msg):
        if not value:
            raise RuntimeError(error_msg)

    def _mkdir(path):
        if not os.path.exists(path):
            os.mkdir(path)

    def _rmdir(path):
        if os.path.exists(path):
            os.remove(path)

    if isinstance(kernel, akg.tvm.module.Module):
        code = kernel.imported_modules[0].get_source()
        kernel_name = code.split("_kernel")[0].split(" ")[-1]
    else:
        kernel_name = kernel
    hbm_addr = 0x4000000
    hbm_unit = 0x1000000
    aic_model_path = os.getenv('AIC_MODEL_PATH')
    _check_exists(
        aic_model_path,
        "AIC_MODEL_PATH environment variable is not set. Please set it to the dir of model_exe"
    )
    aic_model_path = os.path.realpath(aic_model_path)
    # spec : target chip specification.
    spec_name = os.getenv('AIC_MODEL_SPEC_NAME')
    _check_exists(
        spec_name,
        "AIC_MODEL_SPEC_NAME environment variable is not set. Please set it to the name of spec("
        "It should be xxx.spec and the xxx.spec file is under the AIC_MODEL_PATH directory)"
    )
    aic_out_path = os.path.realpath("aic_out")
    _mkdir(aic_out_path)
    calog_path = aic_out_path + "/calog"
    _mkdir(calog_path)

    model_path = aic_out_path + "/model"
    if not os.path.exists(model_path):
        subprocess.call(["ln", "-s", aic_model_path + "/model", model_path])

    kernel_meta_path = get_kernel_meta_path()
    kernel_meta_realpath = os.path.realpath(kernel_meta_path)
    _check_exists(
        kernel_meta_realpath,
        "The parameter kernel_meta_realpath  can not be found, please check")

    o_name = kernel_meta_realpath + "/" + kernel_name + ".o"
    bin_name = aic_out_path + "/kernel.bin"
    subprocess.call([
        "aicore-elf-objcopy", "-O", "binary", "-j", ".text", o_name, bin_name
    ])

    load_dict = {}
    with open("%s/%s.json" % (kernel_meta_realpath, kernel_name), "r") as f:
        load_dict = json.load(f)

    arg_info = []
    desc = {
        "args": arg_info,
        "para_addr": hbm_addr,
        "bin_addr": hbm_addr + 0x100000,
        "bin": "kernel.bin",
        "block": load_dict["blockDim"],
        "spec": aic_model_path + '/' + spec_name,
        "path": aic_out_path
    }
    hbm_addr += hbm_unit

    for i, arg in enumerate(args):
        bin_name = "a_%d.bin" % (i)
        arg.tofile(os.path.join(aic_out_path, bin_name))
        info = {
            "bin": bin_name,
            "size": arg.size * arg.dtype.itemsize,
            "addr": hbm_addr,
            "out": False
        }
        arg_info.append(info)
        need_size = arg.size
        if need_size % hbm_unit:
            need_size += hbm_unit - (need_size % hbm_unit)
        hbm_addr += need_size
    for i in output:
        arg_info[len(arg_info) + i if i < 0 else i]['out'] = True

    config_path = aic_out_path + "/config.toml"
    _rmdir(config_path)
    with os.fdopen(os.open(config_path, os.O_WRONLY | os.O_CREAT, 0o400),
                   'w') as f:
        f.write('title="Sim Config"\n')
        f.write('log_open_value=0xffffffff\n')
        f.write('chip_version=1\n')
        f.write('block_dim=%d\n' % (desc['block']))
        f.write('specPathName="%s"\n' % (desc["spec"]))
        f.write('path="%s/"\n' % (desc["path"]))
        f.write('hbm_para_addr=0x%x\n' % (desc["para_addr"]))
        f.write('[BIN]\n')
        f.write('name="%s"\n' % (desc['bin']))
        f.write('addr=0x%x\n' % (desc['bin_addr']))
        for arg in arg_info:
            f.write('[[output_para_array]]\n'
                    if arg['out'] else '[[input_para_array]]\n')
            f.write('name="%s"\n' % (arg['bin']))
            f.write('addr=0x%x\n' % (arg['addr']))
            f.write('valid=1\n')
            if arg['out']:
                f.write('size=0x%x\n' % (arg['size']))

    run_path = aic_out_path + "/run.sh"
    _rmdir(run_path)
    with os.fdopen(os.open(run_path, os.O_WRONLY | os.O_CREAT, 0o500),
                   'w') as f:
        f.write("cd " + aic_out_path + "\n")
        f.write("export DVCSPEC_DIR=" + aic_model_path + "\n")
        f.write(aic_model_path +
                "/v100_ca_tag_master --gtest_filter=test_st_case.test_st_ca\n")
    subprocess.call(["sh", aic_out_path + "/run.sh"])
    out_list = []
    for i, arg_ in enumerate(args):
        if arg_info[i]['out']:
            out_data = np.fromfile(
                os.path.join(aic_out_path, arg_info[i]['bin']), arg_.dtype)
            # strip unneeded data copied back by aic model
            if out_data.size > args[i].size:
                out_data = out_data[0:arg_.size]
            out_arg = out_data.reshape(arg_.shape)
            out_list.append(out_arg)
    return out_list[0] if len(out_list) == 1 else tuple(out_list)
Exemplo n.º 7
0
def tvm_callback_cce_postproc(code, block_dim=1, workspace=None):
    """Function for dumping ascend meta."""
    if "__aicore__" in code:
        title_dict = {"magic": "RT_DEV_BINARY_MAGIC_ELF"}
    else:
        logging.warning("__aicore__ not found in code.")
        title_dict = dict()

    # kernel name
    kernel_name = code.split("_kernel")[0].split(" ")[-1]
    title_dict["kernelName"] = kernel_name + "_kernel0"

    # thread info
    title_dict["blockDim"] = block_dim

    # bin file info
    bin_file_suffix = ".o"
    title_dict["binFileSuffix"] = bin_file_suffix

    bin_file_name = kernel_name
    title_dict["binFileName"] = bin_file_name

    # sha256
    buf_size = 64 * 1024  # once read 64kb
    root_path = get_kernel_meta_path()
    kernel_file_name = root_path + bin_file_name + bin_file_suffix
    sha256 = hashlib.sha256()
    with open(kernel_file_name, 'rb') as kf:
        while True:
            data = kf.read(buf_size)
            if not data:
                break
            sha256.update(data)
    title_dict["sha256"] = sha256.hexdigest()

    # workspace
    workspace_dict = parse_workspace(workspace)
    if workspace_dict is not None:
        title_dict["workspace"] = workspace_dict

    load_dict = {}
    if not os.path.exists(get_kernel_meta_path()):
        try:
            os.mkdir(root_path)
        except OSError as err:
            # 17, OSError: [Errno 17] File exists
            if err.errno == 17:
                pass
            else:
                raise err
    else:
        fname = root_path + kernel_name + "wk.json"
        if os.path.exists(fname):
            with open(fname, "r") as f:
                load_dict = json.load(f)
            os.remove(fname)

    final_dict = title_dict.copy()
    final_dict.update(load_dict)

    json_file = root_path + kernel_name + ".json"
    write_code(final_dict, json_file)

    return code