Exemplo n.º 1
0
def doit(opts):
    common.myprint(opts, 'Generating ulps')
    common.mkdir_p(opts.ulps_dir)
    for op_name, operator in operators.operators.items():
        if not operator.tests_mpfr:
            continue
        if op_name in ['gammaln', 'lgamma', 'pow']:
            continue

        mpfr_func = operator.tests_mpfr_name()
        mpfr_rnd = ", MPFR_RNDN"

        for typ in common.ftypes:
            if typ == 'f16':
                random_generator = random_f16_generator
                convert_to_type = "nsimd_f32_to_f16"
                convert_from_type = "nsimd_f16_to_f32"
                mantisse = 10
                size = 0xffff
                mpfr_suffix = "flt"
            elif typ == 'f32':
                convert_to_type = "(f32)"
                convert_from_type = ""
                random_generator = random_f32_generator
                mantisse = 23
                #size = 0xffffffff
                size = 0x00ffffff
                mpfr_suffix = "flt"
            elif typ == 'f64':
                convert_to_type = "(f64)"
                convert_from_type = ""
                random_generator = random_f64_generator
                mantisse = 52
                size = 0x00ffffff
                mpfr_suffix = "d"
            else:
                raise Exception('Unsupported type "{}"'.format(typ))

            filename = os.path.join(opts.ulps_dir, '{}_{}_{}.cpp'. \
                       format(op_name, "ulp", typ))

            if not common.can_create_filename(opts, filename):
                continue

            with common.open_utf8(opts, filename) as out:
                out.write(includes)
                out.write(gen_tests.relative_distance_cpp)
                out.write(
                    code.format(typ=typ,
                                nsimd_func=op_name,
                                mpfr_func=mpfr_func,
                                mpfr_rnd=mpfr_rnd,
                                random_generator=random_generator,
                                convert_from_type=convert_from_type,
                                convert_to_type=convert_to_type,
                                mantisse=mantisse,
                                SIZE=size,
                                mpfr_suffix=mpfr_suffix))

            common.clang_format(opts, filename)
Exemplo n.º 2
0
def doit(opts):
    common.myprint(opts, 'Generating friendly but not optimized advanced '
                   'C++ API')
    filename = os.path.join(opts.include_dir, 'friendly_but_not_optimized.hpp')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        out.write('''#ifndef NSIMD_FRIENDLY_BUT_NOT_OPTIMIZED_HPP
                     #define NSIMD_FRIENDLY_BUT_NOT_OPTIMIZED_HPP

                     #include <nsimd/nsimd.h>
                     #include <nsimd/cxx_adv_api.hpp>

                     namespace nsimd {{

                     '''.format(year=date.today().year))
        for op_name, operator in operators.operators.items():
            if operator.cxx_operator == None or len(operator.params) != 3 or \
               operator.name in ['shl', 'shr']:
                continue
            out.write('''{hbar}

                         {code}

                         '''.format(hbar=common.hbar, code=get_impl(operator)))
        out.write('''{hbar}

                     }} // namespace nsimd

                     #endif'''.format(hbar=common.hbar))
    common.clang_format(opts, filename)
Exemplo n.º 3
0
def gen_unary_ops_tests(lf, rt, opts):
    for op_name, s0, s1 in bitwise_unary_ops:
        decls = check + limits + gen_random_val
        # {op}b
        content_src = bitwise_unary_test_template.format(
            op_name=op_name,
            lf=lf,
            rt=rt,
            includes=includes,
            decls=decls,
            rand_statement="__gen_random_val<{lf}, {rt}>();".format(lf=lf,
                                                                    rt=rt),
            test_statement=s0,
            l="",
            term="b")
        filename = get_filename(opts, op_name + "b", lf, rt)
        with common.open_utf8(opts, filename) as fp:
            fp.write(content_src)
        common.clang_format(opts, filename)

        # {op}l
        content_src = bitwise_unary_test_template.format(
            op_name=op_name,
            lf=lf,
            rt=rt,
            includes=includes,
            decls=decls,
            rand_statement="(raw_t)(rand() % 2);".format(lf=lf, rt=rt),
            test_statement=s1,
            l="l",
            term="l")
        filename = get_filename(opts, op_name + "l", lf, rt)
        with common.open_utf8(opts, filename) as fp:
            fp.write(content_src)
        common.clang_format(opts, filename)
Exemplo n.º 4
0
def gen_archis_write_file(opts, op, platform, simd_ext, simd_dir):
    filename = os.path.join(simd_dir, '{}.h'.format(op.name))
    if not common.can_create_filename(opts, filename):
        return
    mod = opts.platforms[platform]
    with common.open_utf8(opts, filename) as out:
        out.write('''#ifndef {guard}
                     #define {guard}

                     #include <nsimd/{platform}/{simd_ext}/types.h>
                     {additional_include}

                     {code}

                     {hbar}

                     #endif
                     '''.format(additional_include=mod.get_additional_include(
            op.name, platform, simd_ext),
                                year=date.today().year,
                                guard=op.get_header_guard(platform, simd_ext),
                                platform=platform,
                                simd_ext=simd_ext,
                                func=op.name,
                                hbar=common.hbar,
                                code=get_simd_implementation(
                                    opts, op, mod, simd_ext)))
    common.clang_format(opts, filename)
Exemplo n.º 5
0
def doit(opts):
    print ('-- Generating advanced C++ API')
    filename = os.path.join(opts.include_dir, 'cxx_adv_api_functions.hpp')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        out.write('''#ifndef NSIMD_CXX_ADV_API_FUNCTIONS_HPP
                     #define NSIMD_CXX_ADV_API_FUNCTIONS_HPP

                     namespace nsimd {{

                     '''.format(year=date.today().year))

        for op_name, operator in operators.operators.items():
            if not operator.autogen_cxx_adv:
                continue

            out.write('''{hbar}

                         {code}

                         '''.format(hbar=common.hbar,
                                    code=get_cxx_advanced_generic(operator)))


        out.write('''{hbar}

                     }} // namespace nsimd

                     #endif'''.format(hbar=common.hbar))
    common.clang_format(opts, filename)
Exemplo n.º 6
0
def doit(opts):
    print ('-- Generating base APIs')
    common.mkdir_p(opts.include_dir)
    filename = os.path.join(opts.include_dir, 'functions.h')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(filename) as out:
        out.write('''#ifndef NSIMD_FUNCTIONS_H
                     #define NSIMD_FUNCTIONS_H

                     '''.format(year=date.today().year))

        for op_name, operator in operators.operators.items():
            out.write('''{}

                         #include NSIMD_AUTO_INCLUDE({}.h)

                         {}

                         {}

                         '''.format(common.hbar, operator.name,
                                    get_c_base_generic(operator),
                                    get_cxx_base_generic(operator)))

        out.write('''{hbar}

                     {put_decl}

                     {hbar}

                     #endif'''. \
                     format(hbar=common.hbar, put_decl=get_put_decl()))
    common.clang_format(opts, filename)
Exemplo n.º 7
0
def gen_bench(f, simd, typ):
    ## TODO
    path = gen_filename(f, simd, typ)
    ## Check if we need to create the file
    if not common.can_create_filename(_opts, path):
        return
    ## Generate specific code for the bench
    category = common.nsimd_category(simd)
    code = gen_code(f, simd, typ, category=category)
    if code is None:
        return
    ## Now aggregate every parts
    bench = ''
    #bench += gen_bench_asm_function(f, typ, category)
    bench += gen_bench_against(f, simd, typ, f.bench_against_cpu())
    bench += code
    bench += gen_bench_unrolls(f, simd, typ, category)
    bench += gen_bench_against(f, simd, typ, f.bench_against_libs())
    ## Finalize code
    code = gen_bench_from_code(f, typ, bench)
    ## Write file
    with common.open_utf8(path) as f:
        f.write(code)
    ## Clang-format it!
    common.clang_format(_opts, path)
Exemplo n.º 8
0
def gen_if_else_tests(lf, rt, opts):
    decls = check + limits + comparison_fp + gen_random_val
    content_src = if_else_test_template.format(lf=lf,
                                               rt=rt,
                                               includes=includes,
                                               decls=decls)
    filename = get_filename(opts, "if_else", lf, rt)
    with common.open_utf8(opts, filename) as fp:
        fp.write(content_src)
    common.clang_format(opts, filename)
Exemplo n.º 9
0
def gen_minmax_ops_tests(lf, rt, opts):
    for op_name in minmax_ops:
        decls = check + limits + comparison_fp + gen_random_val
        content_src = minmax_test_template.format(op_name=op_name,
                                                  lf=lf,
                                                  rt=rt,
                                                  includes=includes,
                                                  decls=decls)
        filename = get_filename(opts, op_name, lf, rt)
        with common.open_utf8(opts, filename) as fp:
            fp.write(content_src)
        common.clang_format(opts, filename)
Exemplo n.º 10
0
def gen_comparison_tests(lf, rt, opts):
    for op_name, op_val in comparison_ops:
        decls = check + limits + comparison_log.format(op_val=op_val) + gen_random_val
        content_src = comparison_test_template.format(
            op_name=op_name, op_val=op_val, lf=lf, rt=rt,
            includes=includes, decls=decls)
        filename = get_filename(opts, op_name, lf, rt)
        if filename == None:
            continue
        with common.open_utf8(opts, filename) as fp:
            fp.write(content_src)
        common.clang_format(opts, filename)
Exemplo n.º 11
0
def gen_ternary_ops_tests(lf, rt, opts):
    for op_name, statement in ternary_ops:
        decls = check + limits + comparison_fp + gen_random_val
        content_src = ternary_ops_template.format(
            op_name=op_name, check_statement=statement.format(lf=lf, rt=rt),
            lf=lf, rt=rt,includes=includes, decls=decls)
        filename = get_filename(opts, op_name, lf, rt)
        if filename == None:
            continue
        with common.open_utf8(opts, filename) as fp:
            fp.write(content_src)
        common.clang_format(opts, filename)
Exemplo n.º 12
0
def write_cpp(opts, simd_ext, emulate_fp16):
    filename = os.path.join(opts.src_dir, 'api_{}.cpp'.format(simd_ext))
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        out.write('''#define NSIMD_INSIDE
                     #include <nsimd/nsimd.h>
                     #include <nsimd/cxx_adv_api.hpp>

                     '''.format(year=date.today().year))
        out.write(get_put_impl(simd_ext))
    common.clang_format(opts, filename)
Exemplo n.º 13
0
def gen_math_functions_tests(lf, rt, opts):
    for op_name in math_ops:
        decls = check + limits + comparison_fp + gen_random_val
        if op_name == "rec": decls += rec_reference
        content_src = math_test_template.format(op_name=op_name,
                                                lf=lf,
                                                rt=rt,
                                                includes=includes,
                                                decls=decls)
        filename = get_filename(opts, op_name, lf, rt)
        with common.open_utf8(opts, filename) as fp:
            fp.write(content_src)
        common.clang_format(opts, filename)
Exemplo n.º 14
0
def gen_archis_write_put(opts, platform, simd_ext, simd_dir):
    filename = os.path.join(simd_dir, 'put.h')
    if not common.can_create_filename(opts, filename):
        return
    op = None
    with common.open_utf8(filename) as out:
        out.write( \
        '''#ifndef NSIMD_{PLATFORM}_{SIMD_EXT}_PUT_H
           #define NSIMD_{PLATFORM}_{SIMD_EXT}_PUT_H

           {include_cpu_put}#include <nsimd/{platform}/{simd_ext}/types.h>
           #include <stdio.h>

           {hbar}

           '''.format(year=date.today().year, hbar=common.hbar,
                      simd_ext=simd_ext, platform=platform,
                      PLATFORM=platform.upper(), SIMD_EXT=simd_ext.upper(),
                      include_cpu_put='#include <nsimd/cpu/cpu/put.h>\n' \
                      if simd_ext != 'cpu' else ''))
        for typ in common.types:
            out.write( \
            '''#if NSIMD_CXX > 0
               extern "C" {{
               #endif

               NSIMD_DLLSPEC
               int nsimd_put_{simd_ext}_{typ}(FILE *, const char *,
                                              nsimd_{simd_ext}_v{typ});

               #if NSIMD_CXX > 0
               }} // extern "C"
               #endif

               #if NSIMD_CXX > 0
               namespace nsimd {{
               NSIMD_INLINE int put(FILE *out, const char *fmt,
                                    nsimd_{simd_ext}_v{typ} a0, {typ},
                                    {simd_ext}) {{
                 return nsimd_put_{simd_ext}_{typ}(out, fmt, a0);
               }}
               }} // namespace nsimd
               #endif

               {hbar}

               '''.format(simd_ext=simd_ext, hbar=common.hbar, typ=typ))
        out.write('#endif')
    common.clang_format(opts, filename)
Exemplo n.º 15
0
def gen_tests(opts):
    for func in rand_functions:
        for word_size, nwords_nrounds in func.wordsize_nwords_nrounds.items():
            for nwords, list_nrounds in nwords_nrounds.items():
                for nrounds in list_nrounds:
                    # Write headers
                    dirname = os.path.join(opts.tests_dir, 'modules', 'random')
                    common.mkdir_p(dirname)
                    filename = os.path.join(dirname, '{}.cpp'. \
                               format(func.gen_function_name(nwords, word_size,
                                                             nrounds)))
                    with common.open_utf8(opts, filename) as out:
                        out.write(
                            func.gen_tests(opts, nrounds, word_size, nwords))

                    common.clang_format(opts, filename)
Exemplo n.º 16
0
def gen_math_functions_tests(lf, rt, opts):
    for op_name in math_ops:
        decls = check + limits + comparison_fp + gen_random_val
        if op_name == "rec":
            decls += rec_reference
            ref_op_name = 'rec'
        else:
            ref_op_name = 'nsimd_scalar_abs_f64'
        content_src = math_test_template.format(op_name=op_name, lf=lf, rt=rt,
                                                ref_op_name=ref_op_name,
                                                includes=includes, decls=decls)
        filename = get_filename(opts, op_name, lf, rt)
        if filename == None:
            continue
        with common.open_utf8(opts, filename) as fp:
            fp.write(content_src)
        common.clang_format(opts, filename)
Exemplo n.º 17
0
def write_cpp(opts, simd_ext, emulate_fp16):
    filename = os.path.join(opts.src_dir, 'api_{}.cpp'.format(simd_ext))
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        out.write('''#define NSIMD_INSIDE
                     #include <nsimd/nsimd.h>
                     #include <nsimd/cxx_adv_api.hpp>

                     '''.format(year=date.today().year))
        for op_name, operator in operators.operators.items():
            if operator.src:
                out.write('''{hbar}

                             #include <nsimd/src/{name}.hpp>

                             '''.format(name=operator.name, hbar=common.hbar))
                out.write(get_impl(operator, emulate_fp16, simd_ext))
        out.write(get_put_impl(simd_ext))

    common.clang_format(opts, filename)
Exemplo n.º 18
0
def doit(opts):
    common.myprint(opts, 'Generating advanced C++ API')
    filename = os.path.join(opts.include_dir, 'cxx_adv_api_functions.hpp')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        out.write('''#ifndef NSIMD_CXX_ADV_API_FUNCTIONS_HPP
                     #define NSIMD_CXX_ADV_API_FUNCTIONS_HPP

                     namespace nsimd {

                     ''')

        for op_name, operator in operators.operators.items():
            if not operator.autogen_cxx_adv:
                continue

            out.write('''{hbar}

                         {code}

                         '''.format(hbar=common.hbar,
                                    code=get_cxx_advanced_generic(operator)))

            if operator.cxx_operator and \
                (operator.args in [['v', 'v'], ['v', 'p']]):
                out.write('{hbar}\n{code}'. \
                        format(hbar=common.hbar,
                               code=gen_assignment_operators(operator)))

        out.write('''{hbar}

                     }} // namespace nsimd

                     #endif'''.format(hbar=common.hbar))
    common.clang_format(opts, filename)
Exemplo n.º 19
0
def generate():
    output = licence_header_cpp(__file__)
    output += ["#include \"typedefs.hxx\""]

    output += NAMESPACE_OPEN
    for cpptype in make_supported_cpp_types(constants.DTYPES):
        output.append("template const {0:}& PamMap::at<{0:}>("
                      "const std::string& key, const {0:}& default_value) "
                      "const;".format(cpptype))
        output.append(
            "template {0:}& PamMap::at<{0:}>(const std::string& key, "
            "{0:}& default_value);".format(cpptype))

    output += NAMESPACE_CLOSE

    return clang_format("\n".join(output))
Exemplo n.º 20
0
def gen_bench(f, simd, typ):
    ## TODO
    path = gen_filename(f, simd, typ)
    ## Check if we need to create the file
    if not common.can_create_filename(_opts, path):
        return
    ## Generate specific code for the bench
    category = common.nsimd_category(simd)
    code = gen_code(f, simd, typ, category=category)
    if code is None:
        return
    ## Now aggregate every parts
    bench = ''
    #bench += gen_bench_asm_function(f, typ, category)
    bench += gen_bench_against(f, 'cpu', typ, f.bench_against_cpu())
    bench += code
    bench += gen_bench_unrolls(f, simd, typ, category)
    bench += gen_bench_against(f, simd, typ, f.bench_against_libs())
    ## bench_with_timestamp
    bench_with_timestamp = ''
    bench_with_timestamp += 'std::map<std::string, std::pair<' + typ + ', double>> sums;' + '\n'
    bench_with_timestamp += 'size_t const nb_runs = 10 * 1000;' + '\n'
    bench_with_timestamp += gen_bench_against_with_timestamp(
        f, 'cpu', typ, f.bench_against_cpu())
    bench_with_timestamp += gen_bench_with_timestamp(f, simd, typ, category)
    bench_with_timestamp += gen_bench_unrolls_with_timestamp(
        f, simd, typ, category)
    bench_with_timestamp += gen_bench_against_with_timestamp(
        f, simd, typ, f.bench_against_libs())
    bench_with_timestamp += '''
                            std::string json = "";
                            json += "{{\\n";
                            json += "  \\"benchmarks\\": [\\n";

                            for (auto const & bench_name_sum_time : sums) {{
                              std::string const & bench_name = bench_name_sum_time.first;
                              {typ} const & sum = bench_name_sum_time.second.first;
                              double const & elapsed_time_ns = bench_name_sum_time.second.second;

                              json += "  {{" "\\n";
                              json += "    \\"name\\": \\"" + bench_name + "/{typ}\\"," + "\\n";
                              json += "    \\"real_time\\": " + std::to_string(elapsed_time_ns) + "," + "\\n";
                              json += "    \\"sum\\": " + std::string(std::isfinite(sum) ? "" : "\\"") + std::to_string(sum) + std::string(std::isfinite(sum) ? "" : "\\"") + "," + "\\n";
                              json += "    \\"time_unit\\": \\"ns\\"\\n";
                              json += "  }}";
                              if (&bench_name_sum_time != &*sums.rbegin()) {{
                                json += ",";
                              }}
                              json += "\\n";
                            }}

                            json += "  ]\\n";
                            json += "}}\\n";

                            std::cout << json << std::flush;
                            '''.format(typ=typ)
    ## Finalize code
    code = gen_bench_from_code(f, typ, bench, '')  # bench_with_timestamp
    ## Write file
    with common.open_utf8(path) as f:
        f.write(code)
    ## Clang-format it!
    common.clang_format(_opts, path)
Exemplo n.º 21
0
def gen_archis_types(opts, simd_dir, platform, simd_ext):
    filename = os.path.join(simd_dir, 'types.h')
    if not common.can_create_filename(opts, filename):
        return
    mod = opts.platforms[platform]
    c_code = '\n'.join([
        'typedef {} nsimd_{}_v{};'.format(mod.get_type(opts, simd_ext, t),
                                          simd_ext, t) for t in common.types
    ])
    c_code += '\n\n'
    c_code += '\n'.join([
        'typedef {} nsimd_{}_vl{};'.format(
            mod.get_logical_type(opts, simd_ext, t), simd_ext, t)
        for t in common.types
    ])
    if mod.has_compatible_SoA_types(simd_ext):
        for deg in range(2, 5):
            c_code += '\n'.join(['typedef {} nsimd_{}_v{}x{};'. \
                                 format(mod.get_SoA_type(simd_ext, typ, deg),
                                 simd_ext, typ, deg) for typ in common.types])
    else:
        c_code += '\n'.join(['''
                             typedef struct nsimd_{simd_ext}_v{typ}x2 {{
                               nsimd_{simd_ext}_v{typ} v0;
                               nsimd_{simd_ext}_v{typ} v1;
                             }} nsimd_{simd_ext}_v{typ}x2;
                             '''.format(simd_ext=simd_ext, typ=typ) \
                                        for typ in common.types])
        c_code += '\n'.join(['''
                             typedef struct nsimd_{simd_ext}_v{typ}x3 {{
                               nsimd_{simd_ext}_v{typ} v0;
                               nsimd_{simd_ext}_v{typ} v1;
                               nsimd_{simd_ext}_v{typ} v2;
                             }} nsimd_{simd_ext}_v{typ}x3;
                             '''.format(simd_ext=simd_ext, typ=typ) \
                                        for typ in common.types])
        c_code += '\n'.join(['''
                             typedef struct nsimd_{simd_ext}_v{typ}x4 {{
                               nsimd_{simd_ext}_v{typ} v0;
                               nsimd_{simd_ext}_v{typ} v1;
                               nsimd_{simd_ext}_v{typ} v2;
                               nsimd_{simd_ext}_v{typ} v3;
                             }} nsimd_{simd_ext}_v{typ}x4;
                             '''.format(simd_ext=simd_ext, typ=typ) \
                                        for typ in common.types])
        c_code += '\n\n'
    cxx_code = '\n\n'.join([
        '''template <>
                               struct simd_traits<{typ}, {simd_ext}> {{
                                 typedef nsimd_{simd_ext}_v{typ} simd_vector;
                                 typedef nsimd_{simd_ext}_v{typ}x2 simd_vectorx2;
                                 typedef nsimd_{simd_ext}_v{typ}x3 simd_vectorx3;
                                 typedef nsimd_{simd_ext}_v{typ}x4 simd_vectorx4;
                                 typedef nsimd_{simd_ext}_vl{typ} simd_vectorl;
                               }};'''.format(typ=t, simd_ext=simd_ext)
        for t in common.types
    ])
    with common.open_utf8(opts, filename) as out:
        out.write('''#ifndef NSIMD_{platform}_{SIMD_EXT}_TYPES_H
                     #define NSIMD_{platform}_{SIMD_EXT}_TYPES_H

                     {c_code}

                     #define NSIMD_{simd_ext}_NB_REGISTERS  {nb_registers}

                     #if NSIMD_CXX > 0
                     namespace nsimd {{

                     struct {simd_ext} {{}};

                     {cxx_code}

                     }} // namespace nsimd
                     #endif

                     #endif
                     '''.\
                     format(year=date.today().year,
                            platform=platform.upper(),
                            SIMD_EXT=simd_ext.upper(),
                            c_code=c_code, cxx_code=cxx_code,
                            simd_ext=simd_ext,
                            nb_registers=mod.get_nb_registers(simd_ext)))
    common.clang_format(opts, filename)
Exemplo n.º 22
0
def gen_functions(opts):
    functions = ''

    for op_name, operator in operators.operators.items():
        if not operator.has_scalar_impl:
            continue

        not_closed = is_not_closed(operator)
        not_closed_tmpl_args = 'typename ToType, ' if not_closed else ''
        not_closed_tmpl_params = 'ToType' if not_closed else 'none_t'

        if op_name in ['shl', 'shr', 'shra']:
            tmpl_args = 'typename Left'
            tmpl_params = 'Left, none_t, none_t'
            size = 'return left.size();'
            args = 'Left const &left, int s'
            members = 'Left left; int s;'
            members_assignment = 'ret.left = to_node(left); ret.s = s;'
            to_node_type = 'typename to_node_t<Left>::type, none_t, none_t'
        elif len(operator.params) == 2:
            tmpl_args = not_closed_tmpl_args + 'typename Left'
            tmpl_params = 'Left, none_t, ' + not_closed_tmpl_params
            size = 'return left.size();'
            args = 'Left const &left'
            members = 'Left left;'
            members_assignment = 'ret.left = to_node(left);'
            to_node_type = 'typename to_node_t<Left>::type, none_t, none_t'
        elif len(operator.params) == 3:
            tmpl_args = 'typename Left, typename Right'
            tmpl_params = 'Left, Right, none_t'
            size = 'return compute_size(left.size(), right.size());'
            args = 'Left const &left, Right const &right'
            members = 'Left left;\nRight right;'
            members_assignment = '''ret.left = to_node(left);
                                    ret.right = to_node(right);'''
            to_node_type = 'typename to_node_t<Left>::type, ' \
                           'typename to_node_t<Right>::type, none_t'
        elif len(operator.params) == 4:
            tmpl_args = 'typename Left, typename Right, typename Extra'
            tmpl_params = 'Left, Right, Extra'
            size = \
            'return compute_size(left.size(), right.size(), extra.size());'
            args = 'Left const &left, Right const &right, Extra const &extra'
            members = 'Left left;\nRight right;\nExtra extra;'
            members_assignment = '''ret.left = to_node(left);
                                    ret.right = to_node(right);
                                    ret.extra = to_node(extra);'''
            to_node_type = 'typename to_node_t<Left>::type, ' \
                           'typename to_node_t<Right>::type, ' \
                           'typename to_node_t<Extra>::type'

        if operator.returns == 'v':
            to_pack = 'to_pack_t'
            return_type = 'out_type'
        else:
            to_pack = 'to_packl_t'
            return_type = 'bool'

        if not_closed:
            to_typ_arg = 'out_type(), '
            to_typ_tmpl_arg = '<typename {to_pack}<out_type, Pack>::type>'. \
                              format(to_pack=to_pack)
            in_out_typedefs = '''typedef typename Left::out_type in_type;
                                 typedef ToType out_type;'''
            to_node_type = 'typename to_node_t<Left>::type, none_t, ToType'
        else:
            to_typ_arg = '' if op_name != 'to_mask' else 'out_type(), '
            to_typ_tmpl_arg = ''
            in_out_typedefs = '''typedef typename Left::out_type in_type;
                                 typedef typename Left::out_type out_type;'''

        impl_args = 'left.{cpu_gpu}_get{tmpl}(i)'
        if (len(operator.params[1:]) >= 2):
            if operator.params[2] == 'p':
                impl_args += ', s'
            else:
                impl_args += ', right.{cpu_gpu}_get{tmpl}(i)'
        if (len(operator.params[1:]) >= 3):
            impl_args += ', extra.{cpu_gpu}_get{tmpl}(i)'

        impl_scalar = 'return nsimd::scalar_{}({}{});'. \
                      format(op_name, to_typ_arg,
                             impl_args.format(cpu_gpu='scalar', tmpl=''))

        impl_gpu = 'return nsimd::gpu_{}({}{});'. \
                   format(op_name, to_typ_arg,
                          impl_args.format(cpu_gpu='gpu', tmpl=''))

        impl_simd = 'return nsimd::{}{}({});'. \
                      format(op_name, to_typ_tmpl_arg,
                             impl_args.format(cpu_gpu='template simd',
                                              tmpl='<Pack>'))

        functions += \
        '''struct {op_name}_t {{}};

        template <{tmpl_args}>
        struct node<{op_name}_t, {tmpl_params}> {{
          {in_out_typedefs}

          {members}

          nsimd::nat size() const {{
            {size}
          }}

        #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM)
          __device__ {return_type} gpu_get(nsimd::nat i) const {{
            {impl_gpu}
          }}
        #else
          {return_type} scalar_get(nsimd::nat i) const {{
            {impl_scalar}
          }}
          template <typename Pack> typename {to_pack}<out_type, Pack>::type
          simd_get(nsimd::nat i) const {{
            {impl_simd}
          }}
        #endif
        }};

        template<{tmpl_args}>
        node<{op_name}_t, {to_node_type}> {op_name}({args}) {{
          node<{op_name}_t, {to_node_type}> ret;
          {members_assignment}
          return ret;
        }}'''.format(op_name=op_name, tmpl_args=tmpl_args, size=size,
                     tmpl_params=tmpl_params, return_type=return_type,
                     args=args, to_pack=to_pack, to_node_type=to_node_type,
                     members=members, members_assignment=members_assignment,
                     in_out_typedefs=in_out_typedefs,
                     impl_gpu=impl_gpu,
                     impl_scalar=impl_scalar,
                     impl_simd=impl_simd)

        if operator.cxx_operator != None and len(operator.params) == 2:
            functions += \
            '''
            template <typename Op, typename Left, typename Right,
                      typename Extra>
            node<{op_name}_t, node<Op, Left, Right, Extra>, none_t, none_t>
            operator{cxx_operator}(node<Op, Left, Right, Extra> const &node) {{
              return tet1d::{op_name}(node);
            }}'''.format(op_name=op_name,
                         cxx_operator=operator.cxx_operator)
        if operator.cxx_operator != None and len(operator.params) == 3:
            functions += '''

            template <typename Op, typename Left, typename Right,
                      typename Extra, typename T>
            node<{op_name}_t, node<Op, Left, Right, Extra>,
                 node<scalar_t, none_t, none_t,
                      typename node<Op, Left, Right, Extra>::in_type>, none_t>
            operator{cxx_operator}(node<Op, Left, Right, Extra> const &node, T a) {{
              typedef typename tet1d::node<Op, Left, Right, Extra>::in_type S;
              return tet1d::{op_name}(node, literal_to<S>::impl(a));
            }}

            template <typename T, typename Op, typename Left, typename Right,
                      typename Extra>
            node<{op_name}_t, node<scalar_t, none_t, none_t,
                              typename node<Op, Left, Right, Extra>::in_type>,
                 node<Op, Left, Right, Extra>, none_t>
            operator{cxx_operator}(T a, node<Op, Left, Right, Extra> const &node) {{
              typedef typename tet1d::node<Op, Left, Right, Extra>::in_type S;
              return tet1d::{op_name}(literal_to<S>::impl(a), node);
            }}

            template <typename LeftOp, typename LeftLeft, typename LeftRight,
                      typename LeftExtra, typename RightOp, typename RightLeft,
                      typename RightRight, typename RightExtra>
            node<{op_name}_t, node<LeftOp, LeftLeft, LeftRight, LeftExtra>,
                              node<RightOp, RightLeft, RightRight, RightExtra>,
                 none_t>
            operator{cxx_operator}(node<LeftOp, LeftLeft, LeftRight,
                                LeftExtra> const &left,
                           node<RightOp, RightLeft, RightRight,
                                RightExtra> const &right) {{
              return tet1d::{op_name}(left, right);
            }}'''.format(op_name=op_name, cxx_operator=operator.cxx_operator)

        functions += '\n\n{}\n\n'.format(common.hbar)

    # Write the code to file
    dirname = os.path.join(opts.include_dir, 'modules', 'tet1d')
    common.mkdir_p(dirname)
    filename = os.path.join(dirname, 'functions.hpp')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        out.write('#ifndef NSIMD_MODULES_TET1D_FUNCTIONS_HPP\n')
        out.write('#define NSIMD_MODULES_TET1D_FUNCTIONS_HPP\n\n')
        out.write('namespace tet1d {\n\n')
        out.write('{}\n\n'.format(common.hbar))
        out.write(functions)
        out.write('} // namespace tet1d\n\n')
        out.write('#endif\n')
    common.clang_format(opts, filename)
Exemplo n.º 23
0
def doit(opts):
    common.myprint(opts, 'Generating scalar implementation for CPU and GPU')
    filename = os.path.join(opts.include_dir, 'scalar_utilities.h')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        # we declare reinterprets now as we need them
        scalar_tmp = []
        gpu_tmp = []
        for t in operators.Reinterpret.types:
            for tt in common.get_output_types(t,
                                              operators.Reinterpret.output_to):
                scalar_tmp += [operators.Reinterpret(). \
                               get_scalar_signature('cpu', t, tt, 'c')]
                gpu_tmp += [operators.Reinterpret(). \
                            get_scalar_signature('gpu', t, tt, 'cxx')]
        scalar_reinterpret_decls = '\n'.join(['NSIMD_INLINE ' + sig + ';' \
                                              for sig in scalar_tmp])
        gpu_reinterpret_decls = '\n'.join(['inline ' + sig + ';' \
                                           for sig in gpu_tmp])
        out.write(
        '''#ifndef NSIMD_SCALAR_UTILITIES_H
           #define NSIMD_SCALAR_UTILITIES_H

           #if NSIMD_CXX > 0
           #include <cmath>
           #include <cstring>
           #else
           #include <math.h>
           #include <string.h>
           #endif

           #ifdef NSIMD_NATIVE_FP16
             #if defined(NSIMD_IS_GCC)
               #pragma GCC diagnostic push
               #pragma GCC diagnostic ignored "-Wdouble-promotion"
             #elif defined(NSIMD_IS_CLANG)
               #pragma clang diagnostic push
               #pragma clang diagnostic ignored "-Wdouble-promotion"
             #endif
           #endif

           {scalar_reinterpret_decls}

           #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM)

           namespace nsimd {{

           {gpu_reinterpret_decls}

           }} // namespace nsimd

           #endif
           '''. \
           format(scalar_reinterpret_decls=scalar_reinterpret_decls,
                  gpu_reinterpret_decls=gpu_reinterpret_decls))
        for op_name, operator in operators.operators.items():
            if not operator.has_scalar_impl:
                continue
            if operator.params == ['l'] * len(operator.params):
                out.write('\n\n' + common.hbar + '\n\n')
                out.write(\
                '''NSIMD_INLINE {c_sig} {{
                  {scalar_impl}
                }}

                #if NSIMD_CXX > 0

                namespace nsimd {{

                NSIMD_INLINE {cxx_sig} {{
                  return nsimd_scalar_{op_name}({c_args});
                }}

                {gpu_impl}

                }} // namespace nsimd

                #endif'''.format(
                c_sig=operator.get_scalar_signature('cpu', '', '', 'c'),
                cxx_sig=operator.get_scalar_signature('cpu', '', '', 'cxx'),
                op_name=op_name,
                c_args=', '.join(['a{}'.format(i - 1) \
                               for i in range(1, len(operator.params))]),
                scalar_impl=scalar.get_impl(operator, tt, t),
                gpu_impl=get_gpu_impl(
                    operator.get_scalar_signature('gpu', t, tt, 'cxx'),
                    cuda.get_impl(operator, tt, t),
                    rocm_impl=rocm.get_impl(operator, tt, t))))
                continue
            for t in operator.types:
                tts = common.get_output_types(t, operator.output_to)
                for tt in tts:
                    out.write('\n\n' + common.hbar + '\n\n')
                    out.write(\
                    '''NSIMD_INLINE {c_sig} {{
                      {scalar_impl}
                    }}

                    #if NSIMD_CXX > 0

                    namespace nsimd {{

                    NSIMD_INLINE {cxx_sig} {{
                      return nsimd_scalar_{op_name}_{suffix}({c_args});
                    }}

                    {gpu_impl}

                    }} // namespace nsimd

                    #endif'''.format(
                    c_sig=operator.get_scalar_signature('cpu', t, tt, 'c'),
                    cxx_sig=operator.get_scalar_signature('cpu', t, tt, 'cxx'),
                    op_name=op_name,
                    suffix=t if operator.closed else '{}_{}'.format(tt, t),
                    c_args=', '.join(['a{}'.format(i - 1) \
                                   for i in range(1, len(operator.params))]),
                    scalar_impl=scalar.get_impl(operator, tt, t),
                    gpu_impl=get_gpu_impl(
                        operator.get_scalar_signature('gpu', t, tt, 'cxx'),
                        cuda.get_impl(operator, tt, t),
                        rocm_impl=rocm.get_impl(operator, tt, t))))

        out.write('''

                  {hbar}

                  #ifdef NSIMD_NATIVE_FP16
                    #if defined(NSIMD_IS_GCC)
                      #pragma GCC diagnostic pop
                    #elif defined(NSIMD_IS_CLANG)
                      #pragma clang diagnostic pop
                    #endif
                  #endif

                  #endif'''.format(hbar=common.hbar))
    common.clang_format(opts, filename)
Exemplo n.º 24
0
def gen_reinterpret_convert(opts, op, from_typ, to_typ, lang):
    filename = get_filename(opts, op, '{}_to_{}'.format(from_typ, to_typ),
                            lang)
    if filename == None:
        return
    logical = 'l' if op.name == 'reinterpretl' else ''
    if lang == 'c_base':
        if op.name == 'upcvt':
            comp = '''{{
                        vecx2({to_typ}) tmp =
                          vupcvt(vload{logical}a(in, {from_typ}),
                                                 {from_typ}, {to_typ});
                        vstore{logical}a(out, vdowncvt(
                            tmp.v0, tmp.v1, {to_typ}, {from_typ}),
                            {from_typ});
                      }}'''.format(op_name=op.name,
                                   from_typ=from_typ,
                                   to_typ=to_typ,
                                   logical=logical)
        else:
            comp = '''vstore{logical}a(out, v{op_name}(v{op_name}(
                        vload{logical}a(in, {from_typ}), {from_typ}, {to_typ}),
                          {to_typ}, {from_typ}), {from_typ});'''. \
                          format(op_name=op.name, from_typ=from_typ,
                                 to_typ=to_typ, logical=logical)
    elif lang == 'cxx_base':
        if op.name == 'upcvt':
            comp = '''vecx2({to_typ}) tmp =
                        nsimd::upcvt(nsimd::load{logical}a(
                            in, {from_typ}()), {from_typ}(), {to_typ}());
                        nsimd::store{logical}a(out, nsimd::downcvt(
                            tmp.v0, tmp.v1, {to_typ}(), {from_typ}()),
                            {from_typ}());'''. \
                            format(op_name=op.name, from_typ=from_typ,
                            to_typ=to_typ, logical=logical)
        else:
            comp = '''nsimd::store{logical}a(out, nsimd::{op_name}(
                        nsimd::{op_name}(nsimd::load{logical}a(
                          in, {from_typ}()), {from_typ}(), {to_typ}()),
                            {to_typ}(), {from_typ}()), {from_typ}());'''. \
                          format(op_name=op.name, from_typ=from_typ,
                                 to_typ=to_typ, logical=logical)
    else:
        if op.name == 'upcvt':
            comp = \
            '''nsimd::packx2<{to_typ}> tmp = nsimd::upcvt<
                 nsimd::pack{logical}x2<{to_typ}> >(nsimd::load{logical}a<
                   nsimd::pack{logical}<{from_typ}> >(in));
               nsimd::store{logical}a(out, nsimd::downcvt<
                 nsimd::pack{logical}<{from_typ}> >(tmp.v0, tmp.v1));'''. \
                 format(op_name=op.name, from_typ=from_typ,
                        to_typ=to_typ, logical=logical)
        else:
            comp = \
            '''nsimd::store{logical}a(out, nsimd::{op_name}<
                 nsimd::pack{logical}<{from_typ}> >(nsimd::{op_name}<
                   nsimd::pack{logical}<{to_typ}> >(nsimd::load{logical}a<
                     nsimd::pack{logical}<{from_typ}> >(in))));'''. \
                     format(op_name=op.name, from_typ=from_typ,
                            to_typ=to_typ, logical=logical)
    if logical == 'l':
        rand = '(rand() % 2)'
    else:
        if op.name == 'reinterpret' and to_typ == 'f16' and \
           from_typ in ['i16', 'u16']:
            rand = '(15360 /* no denormal */ | (1 << (rand() % 4)))'
        else:
            if to_typ in common.utypes or from_typ in common.utypes:
                rand = '(1 << (rand() % 4))'
            else:
                rand = '((2 * (rand() % 2) - 1) * (1 << (rand() % 4)))'
    if from_typ == 'f16':
        rand = 'nsimd_f32_to_f16((f32){});'.format(rand)
        neq_test = '(*(u16*)&in[j]) != (*(u16*)&out[j])'
    else:
        rand = '({}){}'.format(from_typ, rand)
        neq_test = 'in[j] != out[j]'
    with common.open_utf8(filename) as out:
        out.write('''{includes}

           #define CHECK(a) {{ \\
             errno = 0; \\
             if (!(a)) {{ \\
               fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\
                       __LINE__, strerror(errno)); \\
               fflush(stderr); \\
               exit(EXIT_FAILURE); \\
             }} \\
           }}

           int main(void) {{
             int i, j;
             {from_typ} *in, *out;
             int len = vlen({from_typ});

             fprintf(stdout,
                     "test of {op_name} from {from_typ} to {to_typ}...\\n");
             CHECK(in = ({from_typ}*)nsimd_aligned_alloc(len * {sizeof}));
             CHECK(out = ({from_typ}*)nsimd_aligned_alloc(len * {sizeof}));

             for (i = 0; i < 100; i++) {{
               for (j = 0; j < len; j++) {{
                 in[j] = {rand};
               }}

               {comp}

               for (j = 0; j < len; j++) {{
                 if ({neq_test}) {{
                   exit(EXIT_FAILURE);
                 }}
               }}
             }}

             fprintf(stdout,
                     "test of {op_name} from {from_typ} to {to_typ}... OK\\n");
             return EXIT_SUCCESS;
           }}'''.format(includes=get_includes(lang),
                        op_name=op.name,
                        to_typ=to_typ,
                        from_typ=from_typ,
                        comp=comp,
                        year=date.today().year,
                        rand=rand,
                        neq_test=neq_test,
                        sizeof=common.sizeof(from_typ)))
    common.clang_format(opts, filename)
Exemplo n.º 25
0
def gen_nbtrue(opts, op, typ, lang):
    filename = get_filename(opts, op, typ, lang)
    if filename == None:
        return
    if lang == 'c_base':
        nbtrue = 'vnbtrue(vloadla(buf, {}), {})'. \
                 format(typ, typ, typ)
    elif lang == 'cxx_base':
        nbtrue = 'nsimd::nbtrue(nsimd::loadla(buf, {}()), {}())'. \
                 format(typ, typ)
    else:
        nbtrue = 'nsimd::nbtrue(nsimd::loadla<nsimd::packl<{}> >(buf))'. \
                 format(typ)
    if typ == 'f16':
        scalar0 = 'nsimd_f32_to_f16(0)'
        scalar1 = 'nsimd_f32_to_f16(1)'
    else:
        scalar0 = '({})0'.format(typ)
        scalar1 = '({})1'.format(typ)
    with common.open_utf8(filename) as out:
        out.write('''{includes}

           #define CHECK(a) {{ \\
             errno = 0; \\
             if (!(a)) {{ \\
               fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\
                       __LINE__, strerror(errno)); \\
               fflush(stderr); \\
               exit(EXIT_FAILURE); \\
             }} \\
           }}

           int main(void) {{
             int i;
             {typ} *buf;
             int len = vlen({typ});

             fprintf(stdout, "test of {op_name} over {typ}...\\n");
             CHECK(buf = ({typ}*)nsimd_aligned_alloc(len * {sizeof}));

             /* Test with all elements to true */
             for (i = 0; i < len; i++) {{
               buf[i] = {scalar1};
             }}
             if ({nbtrue} != len) {{
               exit(EXIT_FAILURE);
             }}

             /* Test with all elements to false */
             for (i = 0; i < len; i++) {{
               buf[i] = {scalar0};
             }}
             if ({nbtrue} != 0) {{
               exit(EXIT_FAILURE);
             }}

             /* Test with only one element to true */
             buf[0] = {scalar1};
             if ({nbtrue} != 1) {{
               exit(EXIT_FAILURE);
             }}

             fprintf(stdout, "test of {op_name} over {typ}... OK\\n");
             return EXIT_SUCCESS;
           }}'''.format(includes=get_includes(lang),
                        op_name=op.name,
                        typ=typ,
                        nbtrue=nbtrue,
                        year=date.today().year,
                        notl='!' if op.name == 'any' else '',
                        scalar0=scalar0,
                        scalar1=scalar1,
                        sizeof=common.sizeof(typ)))
    common.clang_format(opts, filename)
Exemplo n.º 26
0
def gen_load_store(opts, op, typ, lang):
    filename = get_filename(opts, op, typ, lang)
    if filename == None:
        return
    if op.name.startswith('load'):
        deg = op.name[4]
        align = op.name[5]
    elif op.name.startswith('store'):
        deg = op.name[5]
        align = op.name[6]
    variables = ', '.join(['v.v{}'.format(i) for i in range(0, int(deg))])
    if lang == 'c_base':
        load_store = \
        '''vecx{deg}({typ}) v = vload{deg}{align}(&vin[i], {typ});
           vstore{deg}{align}(&vout[i], {variables}, {typ});'''. \
           format(deg=deg, typ=typ, align=align, variables=variables)
    elif lang == 'cxx_base':
        load_store = \
        '''vecx{deg}({typ}) v = nsimd::load{deg}{align}(&vin[i], {typ}());
           nsimd::store{deg}{align}(&vout[i], {variables}, {typ}());'''. \
           format(deg=deg, typ=typ, align=align, variables=variables)
    else:
        load_store = \
        '''nsimd::packx{deg}<{typ}> v = nsimd::load{deg}{align}<
                                          nsimd::packx{deg}<{typ}> >(&vin[i]);
           nsimd::store{deg}{align}(&vout[i], {variables});'''. \
           format(deg=deg, typ=typ, align=align, variables=variables)
    if typ == 'f16':
        rand = '*((u16*)vin + i) = nsimd_f32_to_u16((float)(rand() % 10));'
        comp = '*((u16 *)vin + i) != *((u16 *)vout + i)'
    else:
        rand = 'vin[i] = ({})(rand() % 10);'.format(typ)
        comp = 'vin[i] != vout[i]'
    with common.open_utf8(filename) as out:
        out.write('''{includes}

           #define SIZE (2048 / {sizeof})

           #define STATUS "test of {op_name} over {typ}"

           #define CHECK(a) {{ \\
             errno = 0; \\
             if (!(a)) {{ \\
               fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\
                       __LINE__, strerror(errno)); \\
               fflush(stderr); \\
               exit(EXIT_FAILURE); \\
             }} \\
           }}

           int main(void) {{
             int i, vi;
             {typ} *vin, *vout;
             int len = vlen({typ});
             int n = SIZE * {deg} * len;

             fprintf(stdout, "test of {op_name} over {typ}...\\n");
             CHECK(vin = ({typ}*)nsimd_aligned_alloc(n * {sizeof}));
             CHECK(vout = ({typ}*)nsimd_aligned_alloc(n * {sizeof}));

             /* Fill with random data */
             for (i = 0; i < n; i++) {{
               {rand}
             }}

             /* Load and put back data into vout */
             for (i = 0; i < n; i += {deg} * len) {{
               {load_store}
             }}

             /* Compare results */
             for (vi = 0; vi < SIZE; vi += len) {{
               for (i = vi; i < vi + len; i++) {{
                 if ({comp}) {{
                   fprintf(stdout, STATUS "... FAIL\\n");
                   fflush(stdout);
                   return -1;
                 }}
               }}
             }}

             fprintf(stdout, "test of {op_name} over {typ}... OK\\n");
             return EXIT_SUCCESS;
           }}'''.format(includes=get_includes(lang),
                        op_name=op.name,
                        typ=typ,
                        rand=rand,
                        year=date.today().year,
                        deg=deg,
                        sizeof=common.sizeof(typ),
                        load_store=load_store,
                        comp=comp))
    common.clang_format(opts, filename)
Exemplo n.º 27
0
def gen_addv(opts, op, typ, lang):
    filename = get_filename(opts, op, typ, lang)
    if filename == None:
        return
    if lang == 'c_base':
        op_test = 'v{}(vloada(buf, {}), {})'.format(op.name, typ, typ)
        extra_code = relative_distance_c
    elif lang == 'cxx_base':
        op_test = 'nsimd::{}(nsimd::loada(buf, {}()), {}())'.format(
            op.name, typ, typ)
        extra_code = relative_distance_cpp
    else:
        op_test = 'nsimd::{}(nsimd::loada<nsimd::pack<{}>>(buf))'.format(
            op.name, typ)
        extra_code = relative_distance_cpp

    nbits = {'f16': '10', 'f32': '21', 'f64': '48'}
    head = '''#define _POSIX_C_SOURCE 200112L
              {includes}
              #include <float.h>
              #include <math.h>

              #define CHECK(a) {{ \\
                errno = 0; \\
                if (!(a)) {{ \\
                fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\
                        __LINE__, strerror(errno)); \\
                fflush(stderr); \\
                exit(EXIT_FAILURE); \\
                }} \\
              }}

              {extra_code}'''.format(year=date.today().year,
                                     includes=get_includes(lang),
                                     extra_code=extra_code)

    if typ == 'f16':
        # Variables initialization
        init = '''f16 res = nsimd_f32_to_f16(0.0f);
                  f32 ref = 0.0f;'''
        rand = '''nsimd_f32_to_f16((f32)(2 * (rand() % 2) - 1) *
                         (f32)(1 << (rand() % 4)) /
                           (f32)(1 << (rand() % 4)))'''
        init_statement = 'buf[i] = {};'.format(rand)
        ref_statement = 'ref += nsimd_u16_to_f32(((u16 *)buf)[i]);'
        test = '''if (relative_distance((double) ref,
                                        (double) nsimd_f16_to_f32(res)) >
                                          get_2th_power(-{nbits})) {{
                    return EXIT_FAILURE;
                  }}'''.format(nbits=nbits[typ])
    else:
        init = '''{typ} ref = ({typ})0;
                  {typ} res = ({typ})0;'''.format(typ=typ)
        rand = '''({typ})(2 * (rand() % 2) - 1) *
                      ({typ})(1 << (rand() % 4)) /
                        ({typ})(1 << (rand() % 4))'''.format(typ=typ)
        init_statement = 'buf[i] = {};'.format(rand)
        ref_statement = 'ref += buf[i];'
        test = '''if (relative_distance((double)ref,
                      (double)res) > get_2th_power(-{nbits})) {{
                    return EXIT_FAILURE;
                  }}'''.format(nbits=nbits[typ])
    with common.open_utf8(filename) as out:
        out.write(''' \
            {head}

            int main(void) {{

            const int len = vlen({typ});
            {typ} *buf;
            int i;
            {init}

            fprintf(stdout, "test of {op_name} over {typ}...\\n");
            CHECK(buf = ({typ} *)nsimd_aligned_alloc(len * {sizeof}));

            for(i = 0; i < len; i++) {{
                {init_statement}
            }}

            for(i = 0; i < len; i++) {{
                {ref_statement}
            }}

            res = {op_test};

            {test}

            fprintf(stdout, "test of {op_name} over {typ}... OK\\n");
            return EXIT_SUCCESS;
            }}
            '''.format(head=head,
                       init=init,
                       op_name=op.name,
                       typ=typ,
                       sizeof=common.sizeof(typ),
                       init_statement=init_statement,
                       ref_statement=ref_statement,
                       op_test=op_test,
                       test=test))
    common.clang_format(opts, filename)
Exemplo n.º 28
0
def gen_test(opts, op, typ, lang, ulps):
    filename = get_filename(opts, op, typ, lang)
    if filename == None:
        return

    content = get_content(op, typ, lang)

    if op.name in ['not', 'and', 'or', 'xor', 'andnot']:
        comp = 'return *({uT}*)&mpfr_out != *({uT}*)&nsimd_out'. \
               format(uT=common.bitfield_type[typ])
    else:
        if typ == 'f16':
            left = '(double)nsimd_f16_to_f32(mpfr_out)'
            right = '(double)nsimd_f16_to_f32(nsimd_out)'
        elif typ == 'f32':
            left = '(double)mpfr_out'
            right = '(double)nsimd_out'
        else:
            left = 'mpfr_out'
            right = 'nsimd_out'
        relative_distance = relative_distance_c if lang == 'c_base' \
                            else relative_distance_cpp
        if op.tests_ulps:
            comp = 'return relative_distance({}, {}) > get_2th_power(-{nbits})'. \
                   format(left, right, nbits='11' if typ != 'f16' else '9')
            extra_code = relative_distance
        elif op.src:
            if op.name in ulps:
                nbits = ulps[op.name][typ]["ulps"]
                nbits_dnz = ulps[op.name][typ]["ulps for denormalized output"]
                inf_error = ulps[op.name][typ]["Inf Error"]
                nan_error = ulps[op.name][typ]["NaN Error"]

                comp = '''#pragma GCC diagnostic push
                          #pragma GCC diagnostic ignored "-Wconversion"
                          #pragma GCC diagnostic ignored "-Wdouble-promotion"
                          '''
                if nan_error:
                    # Ignore error with NaN output, we know we will encounter some
                    comp += 'if ({isnan}((double){left})) return 0;\n'
                else:
                    # Return false if one is NaN and not the other
                    comp += 'if ({isnan}((double){left}) ^ isnan({rigth})) return 1;\n'

                if inf_error:
                    # Ignore error with infinite output, we know we will encounter some
                    comp += 'if ({isinf}((double){left})) return 0;\n'
                else:
                    # One is infinite and not the other
                    comp += 'if ({isinf}((double){left}) ^ {isinf}((double){rigth})) return 1;\n'
                    # Wrong sign for infinite
                    comp += 'if ({isinf}((double){left}) && {isinf}((double){rigth}) \
                                    && ({right}*{left} < 0)) \
                                        return 1;\n'

                comp += '''
                if ({isnormal}((double){left})) {{
                    return relative_distance({left}, {right}) > get_2th_power(-({nbits}));
                }} else {{
                    return relative_distance({left}, {right}) > get_2th_power(-({nbits_dnz}));
                }}
                #pragma GCC diagnostic pop
                '''

                if lang == 'c_base':
                    comp = comp.format(left=left,
                                       right=right,
                                       nbits=nbits,
                                       nbits_dnz=nbits_dnz,
                                       isnormal='isnormal',
                                       isinf='isinf',
                                       isnan='isnan')
                else:
                    comp = comp.format(left=left,
                                       right=right,
                                       nbits=nbits,
                                       nbits_dnz=nbits_dnz,
                                       isnormal='std::isnormal',
                                       isinf='std::isinf',
                                       isnan='std::isnan')

            else:
                nbits = {'f16': '10', 'f32': 21, 'f64': '48'}
                comp = 'return relative_distance({}, {}) > get_2th_power(-{nbits})'. \
                        format(left, right, nbits=nbits[typ])

            extra_code = relative_distance
        else:
            comp = 'return {} != {}'.format(left, right)
            extra_code = ''

    includes = get_includes(lang)
    if op.src or op.tests_ulps or op.tests_mpfr:
        if lang == 'c_base':
            includes = '''#define _POSIX_C_SOURCE 200112L

                          #include <math.h>
                          #include <float.h>
                          {}'''.format(includes)
        else:
            includes = '''#define _POSIX_C_SOURCE 200112L

                          #include <cmath>
                          #include <cfloat>
                          {}'''.format(includes)
        if op.tests_mpfr and sys.platform.startswith('linux'):
            includes = includes + '''
            #pragma GCC diagnostic push
            #pragma GCC diagnostic ignored "-Wsign-conversion"
            #include <mpfr.h>
            #pragma GCC diagnostic pop
            '''

    with common.open_utf8(filename) as out:
        out.write(template.format( \
            includes=includes, sizeof=common.sizeof(typ), typ=typ,
            op_name=op.name, year=date.today().year, comp=comp,
            extra_code=extra_code, **content))
        #vin_defi=content['vin_defi'],
        #vin_rand=content['vin_rand'], vout0_comp=content['vout0_comp'],
        #vout1_comp=content['vout1_comp']))
    common.clang_format(opts, filename)
Exemplo n.º 29
0
def gen_reverse(opts, op, typ, lang):
    filename = get_filename(opts, op, typ, lang)
    if filename == None:
        return
    if lang == 'c_base':
        test_code = 'vstorea( out, vreverse( vloada( in, {typ} ), {typ} ), {typ} );'.format(
            typ=typ)
    elif lang == 'cxx_base':
        test_code = 'nsimd::storea( out, nsimd::reverse( nsimd::loada( in, {typ}() ), {typ}() ), {typ}() );'.format(
            typ=typ)
    elif lang == 'cxx_adv':
        test_code = 'nsimd::storea( out, nsimd::reverse( nsimd::loada<nsimd::pack<{typ}>>( in ) ) );'.format(
            typ=typ)

    if typ == 'f16':
        init = 'in[ i ] = nsimd_f32_to_f16((float)(i + 1));'
        comp = 'ok &= nsimd_f16_to_f32( out[len - 1 - i] ) == nsimd_f16_to_f32( in[i] );'
    else:
        init = 'in[ i ] = ({typ})(i + 1);'.format(typ=typ)
        comp = 'ok &= out[len - 1 - i] == in[i];'

    with common.open_utf8(filename) as out:
        out.write('''{includes}

           #define CHECK(a) {{ \\
             errno = 0; \\
             if (!(a)) {{ \\
               fprintf(stderr, "ERROR: " #a ":%d: %s\\n", \\
                       __LINE__, strerror(errno)); \\
               fflush(stderr); \\
               exit(EXIT_FAILURE); \\
             }} \\
           }}

           int main(void) {{
             unsigned char i;
             int ok;
             {typ} * in;
             {typ} * out;

             int len = vlen({typ});

             fprintf(stdout, "test of {op_name} over {typ}...\\n");
             CHECK(in = ({typ}*)nsimd_aligned_alloc(len * {sizeof}));
             CHECK(out = ({typ}*)nsimd_aligned_alloc(len * {sizeof}));

             for( i = 0 ; i < len ; ++i )
             {{
                 {init}
             }}

             {test_code}

             ok = 1;

             for( i = 0 ; i < len ; ++i )
             {{
               {comp}
             }}

             /*fprintf( stdout, "%f %f %f %f\\n", in[ 0 ], out[ 0 ], in[ 1 ], out[ 1 ] );*/

             if( ok )
             {{
               fprintf(stdout, "test of {op_name} over {typ}... OK\\n");
             }}
             else
             {{
               fprintf(stderr, "test of {op_name} over {typ}... FAIL\\n");
               exit(EXIT_FAILURE);
             }}

             nsimd_aligned_free( in );
             nsimd_aligned_free( out );

             return EXIT_SUCCESS;
           }}
        '''.format(includes=get_includes(lang),
                   op_name=op.name,
                   typ=typ,
                   test_code=test_code,
                   year=date.today().year,
                   sizeof=common.sizeof(typ),
                   init=init,
                   comp=comp))

    common.clang_format(opts, filename)
Exemplo n.º 30
0
def doit(opts):
    common.myprint(opts, 'Copy native Sleef version {}'. \
                         format(opts.sleef_version))

    # First download Sleef
    sleef_dir = os.path.join(opts.script_dir, '..', '_deps-sleef')
    common.mkdir_p(sleef_dir)
    url = 'https://github.com/shibatch/sleef/archive/refs/tags/{}.zip'. \
          format(opts.sleef_version)
    r = requests.get(url, allow_redirects=True)
    sleef_zip = os.path.join(sleef_dir, 'sleef.zip')
    with open(sleef_zip, 'wb') as fout:
        fout.write(r.content)

    # Unzip sleef
    with zipfile.ZipFile(sleef_zip, 'r') as fin:
        fin.extractall(path=sleef_dir)

    # Copy helper function
    def copy(filename):
        dst_filename = os.path.basename(filename)
        shutil.copyfile(
            os.path.join(sleef_dir, 'sleef-{}'.format(opts.sleef_version),
                         filename), os.path.join(opts.src_dir, dst_filename))

    # Copy files
    copy('src/libm/sleefsimddp.c')
    copy('src/libm/sleefsimdsp.c')
    copy('src/libm/sleefdp.c')
    copy('src/libm/sleefsp.c')
    copy('src/common/misc.h')
    copy('src/libm/estrin.h')
    copy('src/libm/dd.h')
    copy('src/libm/df.h')
    copy('src/libm/rempitab.c')
    copy('src/arch/helpersse2.h')
    copy('src/arch/helperavx.h')
    copy('src/arch/helperavx2.h')
    copy('src/arch/helperavx512f.h')
    copy('src/arch/helperneon32.h')
    copy('src/arch/helperadvsimd.h')
    copy('src/arch/helperpower_128.h')
    copy('src/arch/helpersve.h')

    # Sleef uses aliases but we don't need those so we comment them
    def comment_DALIAS_lines(filename):
        src = os.path.join(opts.src_dir, filename)
        dst = os.path.join(opts.src_dir, 'tmp.c')
        with open(src, 'r') as fin, open(dst, 'w') as fout:
            for line in fin:
                if line.startswith('DALIAS_'):
                    fout.write('/* {} */\n'.format(line.strip()))
                else:
                    fout.write(line)
        shutil.copyfile(dst, src)
        os.remove(dst)

    comment_DALIAS_lines('sleefsimdsp.c')
    comment_DALIAS_lines('sleefsimddp.c')

    # Sleef provides runtime SIMD detection via cpuid but we don't need it
    def replace_x86_cpuid(filename):
        src = os.path.join(opts.src_dir, filename)
        dst = os.path.join(opts.src_dir, 'tmp.c')
        with open(src, 'r') as fin, open(dst, 'w') as fout:
            for line in fin:
                if line.startswith('void Sleef_x86CpuID'):
                    fout.write('''static inline
                       void Sleef_x86CpuID(int32_t out[4], uint32_t eax,
                                           uint32_t ecx) {
                         /* We don't care for cpuid detection */
                         out[0] = 0xFFFFFFFF;
                         out[1] = 0xFFFFFFFF;
                         out[2] = 0xFFFFFFFF;
                         out[3] = 0xFFFFFFFF;
                       }
                       ''')
                else:
                    fout.write(line)
        shutil.copyfile(dst, src)
        os.remove(dst)

    replace_x86_cpuid('helpersse2.h')
    replace_x86_cpuid('helperavx.h')
    replace_x86_cpuid('helperavx2.h')
    replace_x86_cpuid('helperavx512f.h')

    # Sleef uses force inline through its INLINE macro defined in misc.h
    # We modify it to avoid warnings and because force inline has been a pain
    # in the past. We also rename some exported symbols.
    with open(os.path.join(opts.src_dir, 'misc.h'), 'a') as fout:
        fout.write('''

        /* NSIMD specific */
        #ifndef NSIMD_SLEEF_MISC_H
        #define NSIMD_SLEEF_MISC_H

        #ifdef INLINE
        #undef INLINE
        #endif
        #define INLINE inline

        #define Sleef_rempitabdp nsimd_sleef_rempitab_f64
        #define Sleef_rempitabsp nsimd_sleef_rempitab_f32

        #endif

        ''')

    # Sleef functions must be renamed properly for each SIMD extensions.
    # Moreover their name must contain their precision (in ULPs). This
    # precision is not the same for all functions and some functions can have
    # differents flavours (or precisions). The "database" is contained within
    # src/libm/funcproto.h. So we parse it and produce names
    # in headers "rename[SIMD ext].h" to avoid modifying Sleef C files.
    funcproto = os.path.join(sleef_dir, 'sleef-{}'.format(opts.sleef_version),
                             'src', 'libm', 'funcproto.h')
    defines = []
    ulp_suffix = {
        '0': '',
        '1': '_u1',
        '2': '_u05',
        '3': '_u35',
        '4': '_u15',
        '5': '_u3500'
    }
    with open(funcproto, 'r') as fin:
        for line in fin:
            if (line.find('{') != -1 and line.find('}') != -1):
                items = [item.strip() \
                         for item in line.strip(' \n\r{},').split(',')]
                items[0] = items[0].strip('"')
                if items[0] == 'NULL':
                    break
                sleef_name_f64 = items[0] + ulp_suffix[items[2]]
                sleef_name_f32 = items[0] + 'f' + ulp_suffix[items[2]]
                items[1] = items[1] if items[1] != '5' else '05'
                if items[1] == '-1':
                    nsimd_name_f64 = 'nsimd_sleef_{}_{{nsimd_ext}}_f64'. \
                                     format(items[0])
                    nsimd_name_f32 = 'nsimd_sleef_{}_{{nsimd_ext}}_f32'. \
                                     format(items[0])
                else:
                    nsimd_name_f64 = \
                    'nsimd_sleef_{}_u{}{{det}}_{{nsimd_ext}}_f64'. \
                    format(items[0], items[1])
                    nsimd_name_f32 = \
                    'nsimd_sleef_{}_u{}{{det}}_{{nsimd_ext}}_f32'. \
                    format(items[0], items[1])
                defines.append('#define x{} {}'.format(sleef_name_f64,
                                                       nsimd_name_f64))
                defines.append('#define x{} {}'.format(sleef_name_f32,
                                                       nsimd_name_f32))
    defines = '\n'.join(defines)

    sleef_to_nsimd = {
        '': ['scalar'],
        'sse2': ['sse2'],
        'sse4': ['sse42'],
        'avx': ['avx'],
        'avx2': ['avx2'],
        'avx512f': ['avx512_knl', 'avx512_skylake'],
        'neon32': ['neon128'],
        'advsimd': ['aarch64'],
        'sve': ['sve128', 'sve256', 'sve512', 'sve1024', 'sve2048'],
        'vsx': ['vmx', 'vsx']
    }

    for simd_ext in [
            '', 'sse2', 'sse4', 'avx', 'avx2', 'avx512f', 'neon32', 'advsimd',
            'sve', 'vsx'
    ]:
        renameheader = os.path.join(opts.src_dir,
                                    'rename{}.h'.format(simd_ext))
        se = simd_ext if simd_ext != '' else 'scalar'
        with open(renameheader, 'w') as fout:
            fout.write('''#ifndef RENAME{SIMD_EXT}_H
               #define RENAME{SIMD_EXT}_H

               '''.format(SIMD_EXT=se.upper()))
            for nse in sleef_to_nsimd[simd_ext]:
                ifdef = '' if simd_ext == '' \
                           else '#ifdef NSIMD_{}'.format(nse.upper())
                endif = '' if simd_ext == '' else '#endif'
                fout.write('''{hbar}
                   /* Naming of functions {nsimd_ext} */

                   {ifdef}

                   #ifdef DETERMINISTIC

                   {defines_det_f32}

                   #else

                   {defines_nondet_f32}

                   #endif

                   #define rempi nsimd_sleef_rempi_{nsimd_ext}
                   #define rempif nsimd_sleef_rempif_{nsimd_ext}
                   #define rempisub nsimd_sleef_rempisub_{nsimd_ext}
                   #define rempisubf nsimd_sleef_rempisubf_{nsimd_ext}
                   #define gammak nsimd_gammak_{nsimd_ext}
                   #define gammafk nsimd_gammafk_{nsimd_ext}

                   {endif}

                   '''.format(NSIMD_EXT=nse.upper(),
                              nsimd_ext=nse,
                              hbar=common.hbar,
                              ifdef=ifdef,
                              endif=endif,
                              defines_det_f32=defines.format(det='d',
                                                             nsimd_ext=nse),
                              defines_nondet_f32=defines.format(det='',
                                                                nsimd_ext=nse),
                              defines_det_f64=defines.format(det='d',
                                                             nsimd_ext=nse),
                              defines_nondet_f64=defines.format(
                                  det='', nsimd_ext=nse)))

            fout.write('\n\n#endif\n\n')

            common.clang_format(opts, renameheader)