Exemplo n.º 1
0
def eval_time(ind, module_file):
    config = task.config_space.get(ind)
    with autotvm.ApplyConfig(config):
        with tvm.target.create("llvm -mcpu=core-avx2"):
            s, arg_bufs = task.func(*task.args)
            func = tvm.runtime.load_module(module_file)

        a_np = np.random.uniform(size=(N, N))
        b_np = np.random.uniform(size=(N, N, N))
        c_np = np.zeros((N, N, N))
        ctx = tvm.cpu()
        a_tvm = tvm.nd.array(a_np.astype(np.float32), ctx=ctx)
        b_tvm = tvm.nd.array(b_np.astype(np.float32), ctx=ctx)
        c_tvm = tvm.nd.array(c_np.astype(np.float32), ctx=ctx)

        evaluator = func.time_evaluator(
            func.entry_name,
            ctx,
            repeat=10,
            number=4,
        )
        variation = 1
        while variation > 0.05:
            res = np.array(sorted(evaluator(a_tvm, b_tvm, c_tvm).results)[:-5])
            variation = res.std() / res.mean()

        #if tuple(arg_bufs[1].shape) == b_tvm.shape:
        #    res = evaluator(c_tvm, b_tvm, a_tvm)
        #else:
        #    res = evaluator(c_tvm, a_tvm, b_tvm)

        return res.mean(), ind
Exemplo n.º 2
0
def limited_test(ind):
    tic = time.time()
    lower_llvm_limit = 1
    upper_llvm_limit = 2
    lower_asm_limit = 0.5
    upper_asm_limit = 2
    results = []
    config = task.config_space.get(ind)
    with autotvm.ApplyConfig(config):
        with tvm.target.create("llvm -mcpu=core-avx2"):
            s, arg_bufs = task.func(*task.args)
            op_func = tvm.build(s, arg_bufs)
            build_time = time.time() - tic

    ll_source = op_func.get_source()

    funcs = ll_source.split('\n\n')
    llvm_opint = 0
    asm_opint = 0
    length = 0
    for func in funcs:
        if 'fmuladd.v' in func and len(func) > length:
            length = len(func)
            longest = func

    loads = 0
    stores = 0
    fmas = 0
    if length > 0:
        lines = longest.split('\n')
        for line in lines:
            if 'load <' in line:
                loads += 1
            elif 'store <' in line:
                stores += 1
            elif 'fmuladd.v8' in line:
                fmas += 1
        if loads + stores > 0:
            llvm_opint = fmas / (loads + stores)

        if llvm_opint >= lower_llvm_limit and llvm_opint <= upper_llvm_limit:
            tic = time.time()
            asm_source = op_func.get_source('asm')
            asm_time = time.time() - tic

            funcs = asm_source.split(':\n')
            length = 0
            for func in funcs:
                if 'vfmadd' in func and len(func) > length:
                    length = len(func)
                    longest = func
            moves = 0
            fmas = 0
            if length > 0:
                lines = longest.split('\n')
                for line in lines:
                    if 'vmov' in line and 'ymm' in line:
                        moves += 1
                    elif 'vfmadd' in line and 'ymm' in line:
                        fmas += 1
                        if '(%r' in line:
                            moves += 1
            if moves > 0:
                asm_opint = fmas / moves

            if asm_opint >= lower_asm_limit and asm_opint <= upper_asm_limit:
                module_file = os.path.join(
                    '/tmp/',
                    ''.join(random.choice(letters) for i in range(10)) + '.o')
                op_func.save(module_file)
                return module_file, llvm_opint, asm_opint, ind, build_time, asm_time

    return '', llvm_opint, asm_opint, ind, build_time, 0