Exemplo n.º 1
0
def doit(opts):
    common.myprint(opts, 'Generating ulps')
    common.mkdir_p(opts.ulps_dir)
    for op_name, operator in operators.operators.items():
        if not operator.tests_mpfr:
            continue
        if op_name in ['gammaln', 'lgamma', 'pow']:
            continue

        mpfr_func = operator.tests_mpfr_name()
        mpfr_rnd = ", MPFR_RNDN"

        for typ in common.ftypes:
            if typ == 'f16':
                random_generator = random_f16_generator
                convert_to_type = "nsimd_f32_to_f16"
                convert_from_type = "nsimd_f16_to_f32"
                mantisse = 10
                size = 0xffff
                mpfr_suffix = "flt"
            elif typ == 'f32':
                convert_to_type = "(f32)"
                convert_from_type = ""
                random_generator = random_f32_generator
                mantisse = 23
                #size = 0xffffffff
                size = 0x00ffffff
                mpfr_suffix = "flt"
            elif typ == 'f64':
                convert_to_type = "(f64)"
                convert_from_type = ""
                random_generator = random_f64_generator
                mantisse = 52
                size = 0x00ffffff
                mpfr_suffix = "d"
            else:
                raise Exception('Unsupported type "{}"'.format(typ))

            filename = os.path.join(opts.ulps_dir, '{}_{}_{}.cpp'. \
                       format(op_name, "ulp", typ))

            if not common.can_create_filename(opts, filename):
                continue

            with common.open_utf8(opts, filename) as out:
                out.write(includes)
                out.write(gen_tests.relative_distance_cpp)
                out.write(
                    code.format(typ=typ,
                                nsimd_func=op_name,
                                mpfr_func=mpfr_func,
                                mpfr_rnd=mpfr_rnd,
                                random_generator=random_generator,
                                convert_from_type=convert_from_type,
                                convert_to_type=convert_to_type,
                                mantisse=mantisse,
                                SIZE=size,
                                mpfr_suffix=mpfr_suffix))

            common.clang_format(opts, filename)
Exemplo n.º 2
0
def doit(opts):
    global _opts
    _opts = opts
    common.myprint(opts, 'Generating benches')
    for f in functions.values():
        if not f.do_bench:
            if opts.verbose:
                common.myprint(opts, 'Skipping bench: {}'.format(f.name))
            continue
        # WE MUST GENERATE CODE FOR EACH SIMD EXTENSION AS OTHER LIBRARY
        # USUALLY DO NOT PROPOSE A GENERIC INTERFACE
        for simd in _opts.simd:
            ## FIXME
            if simd in ['neon128', 'cpu']:
                continue
            for typ in f.types:
                ## FIXME
                if typ == 'f16':
                    continue
                ## Skip non-matching benches
                if opts.match and not opts.match.match(f.name):
                    continue
                ## FIXME
                if f.name in [
                        'gamma', 'lgamma', 'ziplo', 'ziphi', 'unziphi',
                        'unziplo'
                ]:
                    continue
                gen_bench(f, simd, typ)
Exemplo n.º 3
0
def gen_modules_md(opts):
    common.myprint(opts, 'Generating modules.md')
    mods = common.get_modules(opts)
    ndms = []
    for mod in mods:
        name = eval('mods[mod].{}.hatch.name()'.format(mod))
        desc = eval('mods[mod].{}.hatch.desc()'.format(mod))
        ndms.append([name, desc, mod])
    filename = common.get_markdown_file(opts, 'modules')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as fout:
        fout.write('''# Modules

NSIMD comes with several additional modules. A module provides a set of
functionnalities that are usually not at the same level as SIMD intrinsics
and/or that do not provide all C and C++ APIs. These functionnalities are
given with the library because they make heavy use of NSIMD core which
abstract SIMD intrinsics. Below is the exhaustive list of modules.

''')
        for ndm in ndms:
            fout.write('- [{}](module_{}_overview.md)  \n'.format(
                ndm[0], ndm[2]))
            fout.write('\n'.join(['  {}'.format(line.strip()) \
                                  for line in ndm[1].split('\n')]))
            fout.write('\n\n')
Exemplo n.º 4
0
Arquivo: hatch.py Projeto: zoq/nsimd
def main():
    opts = parse_args(sys.argv[1:])
    opts.script_dir = script_dir
    opts.modules_list = None
    opts.platforms_list = None

    ## Gather all SIMD dependencies
    opts.simd = common.get_simds_deps_from_opts(opts)
    common.myprint(opts, 'List of SIMD: {}'.format(', '.join(opts.simd)))
    if opts.archis == True or opts.all == True:
        gen_archis.doit(opts)
    if opts.base_apis == True or opts.all == True:
        gen_base_apis.doit(opts)
    if opts.cxx_api == True or opts.all == True:
        gen_advanced_api.doit(opts)
    if opts.ulps == True or opts.all == True:
        gen_ulps.doit(opts)
    if opts.tests == True or opts.all == True:
        gen_tests.doit(opts)
    if opts.benches == True or opts.all == True:
        gen_benches.doit(opts)
    if opts.src == True or opts.all == True:
        gen_src.doit(opts)
    if opts.scalar_utilities == True or opts.all == True:
        gen_scalar_utilities.doit(opts)
    if opts.friendly_but_not_optimized == True or opts.all == True:
        gen_friendly_but_not_optimized.doit(opts)
    gen_modules.doit(opts)  # this must be here after all NSIMD
    if opts.doc == True or opts.all == True:
        gen_doc.doit(opts)
Exemplo n.º 5
0
def doit(opts):
    common.myprint(opts, 'Generating base APIs')
    common.mkdir_p(opts.include_dir)
    filename = os.path.join(opts.include_dir, 'functions.h')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        out.write('''#ifndef NSIMD_FUNCTIONS_H
                     #define NSIMD_FUNCTIONS_H

                     '''.format(year=date.today().year))

        for op_name, operator in operators.operators.items():
            out.write('''{}

                         #include NSIMD_AUTO_INCLUDE({}.h)

                         {}

                         {}

                         '''.format(common.hbar, operator.name,
                                    get_c_base_generic(operator),
                                    get_cxx_base_generic(operator)))

        out.write('''{hbar}

                     {put_decl}

                     {hbar}

                     #endif'''. \
                     format(hbar=common.hbar, put_decl=get_put_decl()))
    common.clang_format(opts, filename)
Exemplo n.º 6
0
def doit(opts):
    common.myprint(opts, 'Generating tests for module fixed_point')
    for lf in lf_vals:
        for rt in rt_vals:
            ## Arithmetic operators
            gen_arithmetic_ops_tests(lf, rt, opts)

            ## Min and max operators
            gen_minmax_ops_tests(lf, rt, opts)

            ## Ternary_operators
            gen_ternary_ops_tests(lf, rt, opts)

            ## Math functions
            gen_math_functions_tests(lf, rt, opts)

            ## Comparison operators
            gen_comparison_tests(lf, rt, opts)

            ## Bitwise binary operators
            gen_bitwise_ops_tests(lf, rt, opts)

            ## Bitwise unary operators
            gen_unary_ops_tests(lf, rt, opts)

            ## If_else
            gen_if_else_tests(lf, rt, opts)
Exemplo n.º 7
0
def doit(opts):
    common.myprint(opts, 'Generating friendly but not optimized advanced '
                   'C++ API')
    filename = os.path.join(opts.include_dir, 'friendly_but_not_optimized.hpp')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        out.write('''#ifndef NSIMD_FRIENDLY_BUT_NOT_OPTIMIZED_HPP
                     #define NSIMD_FRIENDLY_BUT_NOT_OPTIMIZED_HPP

                     #include <nsimd/nsimd.h>
                     #include <nsimd/cxx_adv_api.hpp>

                     namespace nsimd {{

                     '''.format(year=date.today().year))
        for op_name, operator in operators.operators.items():
            if operator.cxx_operator == None or len(operator.params) != 3 or \
               operator.name in ['shl', 'shr']:
                continue
            out.write('''{hbar}

                         {code}

                         '''.format(hbar=common.hbar, code=get_impl(operator)))
        out.write('''{hbar}

                     }} // namespace nsimd

                     #endif'''.format(hbar=common.hbar))
    common.clang_format(opts, filename)
Exemplo n.º 8
0
def doit(opts):
    common.mkdir_p(opts.src_dir)
    common.myprint(opts, 'Generating source for binary')
    opts.platforms = common.get_platforms(opts)
    for platform in opts.platforms:
        mod = opts.platforms[platform]
        for simd_ext in mod.get_simd_exts():
            write_cpp(opts, simd_ext, mod.emulate_fp16(simd_ext))
Exemplo n.º 9
0
def doit(opts):
    common.myprint(opts, 'Generating module random')

    if opts.library:
        gen_functions(opts)
    if opts.tests:
        gen_tests(opts)
    if opts.doc:
        gen_doc(opts)
Exemplo n.º 10
0
def doit(opts):
    common.myprint(opts, 'Generating module tet1d')
    if opts.library:
        gen_functions(opts)
    if opts.tests:
        gen_tests(opts)
    if opts.doc:
        gen_doc_api(opts)
        gen_doc_overview(opts)
Exemplo n.º 11
0
def gen_archis_platform(opts, platform):
    include_dir = os.path.join(opts.include_dir, platform)
    for s in opts.platforms[platform].get_simd_exts():
        common.myprint(opts, 'Found new SIMD extension: {}'.format(s))
        if s in opts.simd:
            simd_dir = os.path.join(include_dir, s)
            common.mkdir_p(simd_dir)
            gen_archis_types(opts, simd_dir, platform, s)
            gen_archis_simd(opts, platform, s, simd_dir)
        else:
            common.myprint(opts, '  Extension excluded by command line')
Exemplo n.º 12
0
def copy_github_file_to_doc(opts, github_filename, doc_filename):
    common.myprint(opts, 'Copying {} ---> {}'. \
                   format(github_filename, doc_filename))
    if not common.can_create_filename(opts, doc_filename):
        return
    with io.open(github_filename, mode='r', encoding='utf-8') as fin:
        file_content = fin.read()
    # we replace all links to doc/... by nsimd/...
    file_content = file_content.replace('doc/markdown/', 'nsimd/')
    file_content = file_content.replace('doc/', 'nsimd/')
    # we do not use common.open_utf8 as the copyright is already in content
    with io.open(doc_filename, mode='w', encoding='utf-8') as fout:
        fout.write(file_content)
Exemplo n.º 13
0
def build_exe_for_doc(opts):
    if not opts.list_files:
        doc_dir = os.path.join(opts.script_dir, '..', 'doc')
        if platform.system() == 'Windows':
            code = os.system('cd {} && nmake /F Makefile.win'. \
                             format(os.path.normpath(doc_dir)))
        else:
            code = os.system('cd {} && make -f Makefile.nix'. \
                             format(os.path.normpath(doc_dir)))
        if code == 0:
            common.myprint(opts, 'Build successful')
        else:
            common.myprint(opts, 'Build failed')
Exemplo n.º 14
0
def gen_doc_html(opts, title):
    if not opts.list_files:
        build_exe_for_doc(opts)
        md2html = 'md2html.exe' if platform.system() == 'Windows' \
                                else 'md2html'
        doc_dir = os.path.join(opts.script_dir, '..', 'doc')
        full_path_md2html = os.path.join(doc_dir, md2html)
        if not os.path.isfile(full_path_md2html):
            common.myprint(opts, '{} not found'.format(md2html))
            return

    # get all markdown files
    md_dir = common.get_markdown_dir(opts)
    html_dir = get_html_dir(opts)

    if not os.path.isdir(html_dir):
        mkdir_p(html_dir)

    doc_files = []
    for filename in os.listdir(md_dir):
        name = os.path.basename(filename)
        if name.endswith('.md'):
            doc_files.append(os.path.splitext(name)[0])

    if opts.list_files:
        ## list gen files
        for filename in doc_files:
            input_name = os.path.join(md_dir, filename + '.md')
            output_name = os.path.join(html_dir, filename + '.html')
            print(output_name)
    else:
        ## gen html files
        footer = get_html_footer()
        tmp_file = os.path.join(doc_dir, 'tmp.html')
        for filename in doc_files:
            header = get_html_header(opts, title, filename)
            input_name = os.path.join(md_dir, filename + '.md')
            output_name = os.path.join(html_dir, filename + '.html')
            os.system('{} "{}" "{}"'.format(full_path_md2html, input_name,
                                            tmp_file))
            with common.open_utf8(opts, output_name) as fout:
                fout.write(header)
                with io.open(tmp_file, mode='r', encoding='utf-8') as fin:
                    fout.write(fin.read())
                fout.write(footer)
Exemplo n.º 15
0
def doit(opts):
    common.myprint(opts, 'Generating advanced C++ API')
    filename = os.path.join(opts.include_dir, 'cxx_adv_api_functions.hpp')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        out.write('''#ifndef NSIMD_CXX_ADV_API_FUNCTIONS_HPP
                     #define NSIMD_CXX_ADV_API_FUNCTIONS_HPP

                     namespace nsimd {{

                     '''.format(year=date.today().year))

        for op_name, operator in operators.operators.items():
            if not operator.autogen_cxx_adv:
                continue

            out.write('''{hbar}

                         {code}

                         '''.format(hbar=common.hbar,
                                    code=get_cxx_advanced_generic(operator)))

            if operator.cxx_operator and \
                (operator.args in [['v', 'v'], ['v', 'p']]):
              out.write('{hbar}\n{code}'. \
                      format(hbar=common.hbar,
                             code=gen_assignment_operators(operator)))


        out.write('''{hbar}

                     }} // namespace nsimd

                     #endif'''.format(hbar=common.hbar))
    common.clang_format(opts, filename)
Exemplo n.º 16
0
def doit(opts):
    common.myprint(opts, 'Generating SIMD implementations')
    opts.platforms = common.get_platforms(opts)
    for p in opts.platforms:
        common.mkdir_p(os.path.join(opts.include_dir, p))
        gen_archis_platform(opts, p)
Exemplo n.º 17
0
def TODO(f):
    if _opts.verbose:
        common.myprint(opts, '@@ TODO: ' + f.name)
Exemplo n.º 18
0
Arquivo: hatch.py Projeto: zoq/nsimd
def doit(opts):
    common.myprint(opts, 'Generating module memory_management')
    if not opts.doc:
        return
    filename = common.get_markdown_file(opts, 'overview', 'memory_management')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as fout:
        fout.write('''# Overview

This module provides C-style memory managmenent functions. Its purpose is not
to become a fully feature container library. It is to provide portable
malloc, memcpy and free functions with a little helpers to copy data from and
to the devices.

# API reference

## Equivalents of malloc, calloc, memcpy and free for devices

Note that the below functions simply wraps the corresponding C functions
when targeting a CPU.

- `template <typename T> T *device_malloc(size_t sz)`{br}
  Allocates `sz * sizeof(T)` bytes of memory on the device.
  On error NULL is returned.

- `template <typename T> T *device_calloc(size_t sz)`{br}
  Allocates `sz * sizeof(T)` bytes of memory on the device and set the
  allocated memory to zero.
  On error NULL is returned.

- `template <typename T> void device_free(T *ptr)`{br}
  Free the memory pointed to by the given pointer.

- `template <typename T> void copy_to_device(T *device_ptr, T *host_ptr,
  size_t sz)`{br}
  Copy data to from host to device.

- `template <typename T> void copy_to_host(T *host_ptr, T *device_ptr,
  size_t sz)`{br}
  Copy data to from device to host.

- `#define nsimd_fill_dev_mem_func(func_name, expr)`{br}
  Create a device function that will fill data with `expr`. To call the created
  function one simply does `func_name(ptr, sz)`. The `expr` argument represents
  some simple C++ expression that can depend only on `i` the i-th element in
  the vector as shown in the example below.

  ```c++
  nsimd_fill_dev_mem_func(prng, ((i * 1103515245 + 12345) / 65536) % 32768)

  int main() {{
    prng(ptr, 1000);
    return 0;
  }}
  ```

## Pairs of pointers

It is often useful to allocate a pair of data buffers: one on the host and
one on the devices to perform data transfers. The below functions provides
quick ways to malloc, calloc, free and memcpy pointers on host and devices at
once. Note that when targeting CPUs the pair of pointers is reduced to one
pointer that ponit the a single data buffer in which case memcpy's are not
performed. Note also that there is no implicit synchronization of data
between both data buffers. It is up to the programmer to triggers memcpy's.

```c++
template <typename T>
struct paired_pointers_t {{
  T *device_ptr, *host_ptr;
  size_t sz;
}};
```

Members of the above structure are not to be modified but can be passed as
arguments for reading/writing data from/to memory they point to.

- `template <typename T> paired_pointers_t<T> pair_malloc(size_t sz)`{br}
  Allocate `sz * sizeof(T)` bytes of memory on the host and on the device.
  If an error occurs both pointers are NULL.

- `template <typename T> paired_pointers_t<T> pair_malloc_or_exit(size_t
  sz)`{br}
  Allocate `sz * sizeof(T)` bytes of memory on the host and on the device.
  If an error occurs, prints an error message on stderr and exit(3).

- `template <typename T> paired_pointers_t<T> pair_calloc(size_t sz)`{br}
  Allocate `sz * sizeof(T)` bytes of memory on the host and on the device.
  Write both data buffers with zeros.
  If an error occurs both pointers are NULL.

- `template <typename T> paired_pointers_t<T> pair_calloc_or_exit(size_t
  sz)`{br}
  Allocate `sz * sizeof(T)` bytes of memory on the host and on the device.
  Write both data buffers with zeros.
  If an error occurs, prints an error message on stderr and exit(3).

- `template <typename T> void pair_free(paired_pointers_t<T> p)`{br}
  Free data buffers on the host and the device.

- `template <typename T> void copy_to_device(paired_pointers_t<T> p)`{br}
  Copy data from the host buffer to its corresponding device buffer.

- `template <typename T> void copy_to_host(paired_pointers_t<T> p)`{br}
  Copy data from the device buffer to its corresponding host buffer.
'''.format(br='  '))
Exemplo n.º 19
0
def doit(opts, op_list):
    common.myprint(opts, 'Generating doc for module fixed_point')
    gen_overview(opts)
    gen_api(opts, op_list)
    gen_doc(opts, op_list)
Exemplo n.º 20
0
def gen_html(opts):
    common.myprint(opts, 'Generating HTML documentation')
    gen_doc_html(opts, 'NSIMD documentation')
Exemplo n.º 21
0
def doit(opts):
    common.myprint(opts, 'Copy native Sleef version {}'. \
                         format(opts.sleef_version))

    # First download Sleef
    sleef_dir = os.path.join(opts.script_dir, '..', '_deps-sleef')
    common.mkdir_p(sleef_dir)
    url = 'https://github.com/shibatch/sleef/archive/refs/tags/{}.zip'. \
          format(opts.sleef_version)
    r = requests.get(url, allow_redirects=True)
    sleef_zip = os.path.join(sleef_dir, 'sleef.zip')
    with open(sleef_zip, 'wb') as fout:
        fout.write(r.content)

    # Unzip sleef
    with zipfile.ZipFile(sleef_zip, 'r') as fin:
        fin.extractall(path=sleef_dir)

    # Copy helper function
    def copy(filename):
        dst_filename = os.path.basename(filename)
        shutil.copyfile(
            os.path.join(sleef_dir, 'sleef-{}'.format(opts.sleef_version),
                         filename), os.path.join(opts.src_dir, dst_filename))

    # Copy files
    copy('src/libm/sleefsimddp.c')
    copy('src/libm/sleefsimdsp.c')
    copy('src/libm/sleefdp.c')
    copy('src/libm/sleefsp.c')
    copy('src/common/misc.h')
    copy('src/libm/estrin.h')
    copy('src/libm/dd.h')
    copy('src/libm/df.h')
    copy('src/libm/rempitab.c')
    copy('src/arch/helpersse2.h')
    copy('src/arch/helperavx.h')
    copy('src/arch/helperavx2.h')
    copy('src/arch/helperavx512f.h')
    copy('src/arch/helperneon32.h')
    copy('src/arch/helperadvsimd.h')
    copy('src/arch/helperpower_128.h')
    copy('src/arch/helpersve.h')

    # Sleef uses aliases but we don't need those so we comment them
    def comment_DALIAS_lines(filename):
        src = os.path.join(opts.src_dir, filename)
        dst = os.path.join(opts.src_dir, 'tmp.c')
        with open(src, 'r') as fin, open(dst, 'w') as fout:
            for line in fin:
                if line.startswith('DALIAS_'):
                    fout.write('/* {} */\n'.format(line.strip()))
                else:
                    fout.write(line)
        shutil.copyfile(dst, src)
        os.remove(dst)

    comment_DALIAS_lines('sleefsimdsp.c')
    comment_DALIAS_lines('sleefsimddp.c')

    # Sleef provides runtime SIMD detection via cpuid but we don't need it
    def replace_x86_cpuid(filename):
        src = os.path.join(opts.src_dir, filename)
        dst = os.path.join(opts.src_dir, 'tmp.c')
        with open(src, 'r') as fin, open(dst, 'w') as fout:
            for line in fin:
                if line.startswith('void Sleef_x86CpuID'):
                    fout.write('''static inline
                       void Sleef_x86CpuID(int32_t out[4], uint32_t eax,
                                           uint32_t ecx) {
                         /* We don't care for cpuid detection */
                         out[0] = 0xFFFFFFFF;
                         out[1] = 0xFFFFFFFF;
                         out[2] = 0xFFFFFFFF;
                         out[3] = 0xFFFFFFFF;
                       }
                       ''')
                else:
                    fout.write(line)
        shutil.copyfile(dst, src)
        os.remove(dst)

    replace_x86_cpuid('helpersse2.h')
    replace_x86_cpuid('helperavx.h')
    replace_x86_cpuid('helperavx2.h')
    replace_x86_cpuid('helperavx512f.h')

    # Sleef uses force inline through its INLINE macro defined in misc.h
    # We modify it to avoid warnings and because force inline has been a pain
    # in the past. We also rename some exported symbols.
    with open(os.path.join(opts.src_dir, 'misc.h'), 'a') as fout:
        fout.write('''

        /* NSIMD specific */
        #ifndef NSIMD_SLEEF_MISC_H
        #define NSIMD_SLEEF_MISC_H

        #ifdef INLINE
        #undef INLINE
        #endif
        #define INLINE inline

        #define Sleef_rempitabdp nsimd_sleef_rempitab_f64
        #define Sleef_rempitabsp nsimd_sleef_rempitab_f32

        #endif

        ''')

    # Sleef functions must be renamed properly for each SIMD extensions.
    # Moreover their name must contain their precision (in ULPs). This
    # precision is not the same for all functions and some functions can have
    # differents flavours (or precisions). The "database" is contained within
    # src/libm/funcproto.h. So we parse it and produce names
    # in headers "rename[SIMD ext].h" to avoid modifying Sleef C files.
    funcproto = os.path.join(sleef_dir, 'sleef-{}'.format(opts.sleef_version),
                             'src', 'libm', 'funcproto.h')
    defines = []
    ulp_suffix = {
        '0': '',
        '1': '_u1',
        '2': '_u05',
        '3': '_u35',
        '4': '_u15',
        '5': '_u3500'
    }
    with open(funcproto, 'r') as fin:
        for line in fin:
            if (line.find('{') != -1 and line.find('}') != -1):
                items = [item.strip() \
                         for item in line.strip(' \n\r{},').split(',')]
                items[0] = items[0].strip('"')
                if items[0] == 'NULL':
                    break
                sleef_name_f64 = items[0] + ulp_suffix[items[2]]
                sleef_name_f32 = items[0] + 'f' + ulp_suffix[items[2]]
                items[1] = items[1] if items[1] != '5' else '05'
                if items[1] == '-1':
                    nsimd_name_f64 = 'nsimd_sleef_{}_{{nsimd_ext}}_f64'. \
                                     format(items[0])
                    nsimd_name_f32 = 'nsimd_sleef_{}_{{nsimd_ext}}_f32'. \
                                     format(items[0])
                else:
                    nsimd_name_f64 = \
                    'nsimd_sleef_{}_u{}{{det}}_{{nsimd_ext}}_f64'. \
                    format(items[0], items[1])
                    nsimd_name_f32 = \
                    'nsimd_sleef_{}_u{}{{det}}_{{nsimd_ext}}_f32'. \
                    format(items[0], items[1])
                defines.append('#define x{} {}'.format(sleef_name_f64,
                                                       nsimd_name_f64))
                defines.append('#define x{} {}'.format(sleef_name_f32,
                                                       nsimd_name_f32))
    defines = '\n'.join(defines)

    sleef_to_nsimd = {
        '': ['scalar'],
        'sse2': ['sse2'],
        'sse4': ['sse42'],
        'avx': ['avx'],
        'avx2': ['avx2'],
        'avx512f': ['avx512_knl', 'avx512_skylake'],
        'neon32': ['neon128'],
        'advsimd': ['aarch64'],
        'sve': ['sve128', 'sve256', 'sve512', 'sve1024', 'sve2048'],
        'vsx': ['vmx', 'vsx']
    }

    for simd_ext in [
            '', 'sse2', 'sse4', 'avx', 'avx2', 'avx512f', 'neon32', 'advsimd',
            'sve', 'vsx'
    ]:
        renameheader = os.path.join(opts.src_dir,
                                    'rename{}.h'.format(simd_ext))
        se = simd_ext if simd_ext != '' else 'scalar'
        with open(renameheader, 'w') as fout:
            fout.write('''#ifndef RENAME{SIMD_EXT}_H
               #define RENAME{SIMD_EXT}_H

               '''.format(SIMD_EXT=se.upper()))
            for nse in sleef_to_nsimd[simd_ext]:
                ifdef = '' if simd_ext == '' \
                           else '#ifdef NSIMD_{}'.format(nse.upper())
                endif = '' if simd_ext == '' else '#endif'
                fout.write('''{hbar}
                   /* Naming of functions {nsimd_ext} */

                   {ifdef}

                   #ifdef DETERMINISTIC

                   {defines_det_f32}

                   #else

                   {defines_nondet_f32}

                   #endif

                   #define rempi nsimd_sleef_rempi_{nsimd_ext}
                   #define rempif nsimd_sleef_rempif_{nsimd_ext}
                   #define rempisub nsimd_sleef_rempisub_{nsimd_ext}
                   #define rempisubf nsimd_sleef_rempisubf_{nsimd_ext}
                   #define gammak nsimd_gammak_{nsimd_ext}
                   #define gammafk nsimd_gammafk_{nsimd_ext}

                   {endif}

                   '''.format(NSIMD_EXT=nse.upper(),
                              nsimd_ext=nse,
                              hbar=common.hbar,
                              ifdef=ifdef,
                              endif=endif,
                              defines_det_f32=defines.format(det='d',
                                                             nsimd_ext=nse),
                              defines_nondet_f32=defines.format(det='',
                                                                nsimd_ext=nse),
                              defines_det_f64=defines.format(det='d',
                                                             nsimd_ext=nse),
                              defines_nondet_f64=defines.format(
                                  det='', nsimd_ext=nse)))

            fout.write('\n\n#endif\n\n')

            common.clang_format(opts, renameheader)
Exemplo n.º 22
0
def gen_what_is_wrapped(opts):
    common.myprint(opts, 'Generating "which intrinsics are wrapped"')
    build_exe_for_doc(opts)
    wrapped = 'what_is_wrapped.exe' if platform.system() == 'Windows' \
                                    else 'what_is_wrapped'
    doc_dir = os.path.join(opts.script_dir, '..', 'doc')
    full_path_wrapped = os.path.join(doc_dir, wrapped)
    if not os.path.isfile(full_path_wrapped):
        common.myprint(opts, '{} not found'.format(wrapped))
        return

    # Content for indexing files created in this function
    index = '# Intrinsics that are wrapped\n'

    # Build command line
    cmd0 = '{} {},{},{},{},{},{}'.format(full_path_wrapped, common.in0,
                                         common.in1, common.in2, common.in3,
                                         common.in4, common.in5)

    # For now we only list Intel, Arm and POWERPC intrinsics
    simd_exts = common.x86_simds + common.arm_simds + common.ppc_simds
    for p in common.get_platforms(opts):
        index_simds = ''
        for simd_ext in opts.platforms_list[p].get_simd_exts():
            if simd_ext not in simd_exts:
                continue
            md = os.path.join(common.get_markdown_dir(opts),
                              'wrapped_intrinsics_for_{}.md'.format(simd_ext))
            index_simds += '- [{}](wrapped_intrinsics_for_{}.md)\n'. \
                           format(simd_ext.upper(), simd_ext)
            ops = [[], [], [], []]
            for op_name, operator in operators.items():
                if operator.src:
                    continue
                c_src = os.path.join(opts.include_dir, p, simd_ext,
                                     '{}.h'.format(op_name))
                ops[operator.output_to].append('{} "{}"'. \
                                               format(op_name, c_src))
            if not common.can_create_filename(opts, md):
                continue
            with common.open_utf8(opts, md) as fout:
                fout.write('# Intrinsics wrapped for {}\n\n'. \
                           format(simd_ext.upper()))
                fout.write('Notations are as follows:\n'
                           '- `T` for trick usually using other intrinsics\n'
                           '- `E` for scalar emulation\n'
                           '- `NOOP` for no operation\n'
                           '- `NA` means the operator does not exist for '
                           'the given type\n'
                           '- `intrinsic` for the actual wrapped intrinsic\n'
                           '\n')
            cmd = '{} {} same {} >> "{}"'.format(
                cmd0, simd_ext, ' '.join(ops[common.OUTPUT_TO_SAME_TYPE]), md)
            if os.system(cmd) != 0:
                common.myprint(opts, 'Unable to generate markdown for '
                               '"same"')
                continue

            cmd = '{} {} same_size {} >> "{}"'.format(
                cmd0, simd_ext,
                ' '.join(ops[common.OUTPUT_TO_SAME_SIZE_TYPES]), md)
            if os.system(cmd) != 0:
                common.myprint(
                    opts, 'Unable to generate markdown for '
                    '"same_size"')
                continue

            cmd = '{} {} bigger_size {} >> "{}"'.format(
                cmd0, simd_ext, ' '.join(ops[common.OUTPUT_TO_UP_TYPES]), md)
            if os.system(cmd) != 0:
                common.myprint(
                    opts, 'Unable to generate markdown for '
                    '"bigger_size"')
                continue

            cmd = '{} {} lesser_size {} >> "{}"'.format(
                cmd0, simd_ext, ' '.join(ops[common.OUTPUT_TO_DOWN_TYPES]), md)
            if os.system(cmd) != 0:
                common.myprint(
                    opts, 'Unable to generate markdown for '
                    '"lesser_size"')
                continue
        if index_simds != '':
            index += '\n## Platform {}\n\n'.format(p)
            index += index_simds

    md = os.path.join(common.get_markdown_dir(opts), 'wrapped_intrinsics.md')
    if common.can_create_filename(opts, md):
        with common.open_utf8(opts, md) as fout:
            fout.write(index)
Exemplo n.º 23
0
def doit(opts):
    common.myprint(opts, 'Generating scalar implementation for CPU and GPU')
    filename = os.path.join(opts.include_dir, 'scalar_utilities.h')
    if not common.can_create_filename(opts, filename):
        return
    with common.open_utf8(opts, filename) as out:
        # we declare reinterprets now as we need them
        scalar_tmp = []
        gpu_tmp = []
        for t in operators.Reinterpret.types:
            for tt in common.get_output_types(t,
                                              operators.Reinterpret.output_to):
                scalar_tmp += [operators.Reinterpret(). \
                               get_scalar_signature('cpu', t, tt, 'c')]
                gpu_tmp += [operators.Reinterpret(). \
                            get_scalar_signature('gpu', t, tt, 'cxx')]
        scalar_reinterpret_decls = '\n'.join(['NSIMD_INLINE ' + sig + ';' \
                                              for sig in scalar_tmp])
        gpu_reinterpret_decls = '\n'.join(['inline ' + sig + ';' \
                                           for sig in gpu_tmp])
        out.write(
        '''#ifndef NSIMD_SCALAR_UTILITIES_H
           #define NSIMD_SCALAR_UTILITIES_H

           #if NSIMD_CXX > 0
           #include <cmath>
           #include <cstring>
           #else
           #include <math.h>
           #include <string.h>
           #endif

           #ifdef NSIMD_NATIVE_FP16
             #if defined(NSIMD_IS_GCC)
               #pragma GCC diagnostic push
               #pragma GCC diagnostic ignored "-Wdouble-promotion"
             #elif defined(NSIMD_IS_CLANG)
               #pragma clang diagnostic push
               #pragma clang diagnostic ignored "-Wdouble-promotion"
             #endif
           #endif

           {scalar_reinterpret_decls}

           #if defined(NSIMD_CUDA) || defined(NSIMD_ROCM)

           namespace nsimd {{

           {gpu_reinterpret_decls}

           }} // namespace nsimd

           #endif
           '''. \
           format(scalar_reinterpret_decls=scalar_reinterpret_decls,
                  gpu_reinterpret_decls=gpu_reinterpret_decls))
        for op_name, operator in operators.operators.items():
            if not operator.has_scalar_impl:
                continue
            if operator.params == ['l'] * len(operator.params):
                out.write('\n\n' + common.hbar + '\n\n')
                out.write(\
                '''NSIMD_INLINE {c_sig} {{
                  {scalar_impl}
                }}

                #if NSIMD_CXX > 0

                namespace nsimd {{

                NSIMD_INLINE {cxx_sig} {{
                  return nsimd_scalar_{op_name}({c_args});
                }}

                {gpu_impl}

                }} // namespace nsimd

                #endif'''.format(
                c_sig=operator.get_scalar_signature('cpu', '', '', 'c'),
                cxx_sig=operator.get_scalar_signature('cpu', '', '', 'cxx'),
                op_name=op_name,
                c_args=', '.join(['a{}'.format(i - 1) \
                               for i in range(1, len(operator.params))]),
                scalar_impl=scalar.get_impl(operator, tt, t),
                gpu_impl=get_gpu_impl(
                    operator.get_scalar_signature('gpu', t, tt, 'cxx'),
                    cuda.get_impl(operator, tt, t),
                    rocm_impl=rocm.get_impl(operator, tt, t))))
                continue
            for t in operator.types:
                tts = common.get_output_types(t, operator.output_to)
                for tt in tts:
                    out.write('\n\n' + common.hbar + '\n\n')
                    out.write(\
                    '''NSIMD_INLINE {c_sig} {{
                      {scalar_impl}
                    }}

                    #if NSIMD_CXX > 0

                    namespace nsimd {{

                    NSIMD_INLINE {cxx_sig} {{
                      return nsimd_scalar_{op_name}_{suffix}({c_args});
                    }}

                    {gpu_impl}

                    }} // namespace nsimd

                    #endif'''.format(
                    c_sig=operator.get_scalar_signature('cpu', t, tt, 'c'),
                    cxx_sig=operator.get_scalar_signature('cpu', t, tt, 'cxx'),
                    op_name=op_name,
                    suffix=t if operator.closed else '{}_{}'.format(tt, t),
                    c_args=', '.join(['a{}'.format(i - 1) \
                                   for i in range(1, len(operator.params))]),
                    scalar_impl=scalar.get_impl(operator, tt, t),
                    gpu_impl=get_gpu_impl(
                        operator.get_scalar_signature('gpu', t, tt, 'cxx'),
                        cuda.get_impl(operator, tt, t),
                        rocm_impl=rocm.get_impl(operator, tt, t))))

        out.write('''

                  {hbar}

                  #ifdef NSIMD_NATIVE_FP16
                    #if defined(NSIMD_IS_GCC)
                      #pragma GCC diagnostic pop
                    #elif defined(NSIMD_IS_CLANG)
                      #pragma clang diagnostic pop
                    #endif
                  #endif

                  #endif'''.format(hbar=common.hbar))
    common.clang_format(opts, filename)
Exemplo n.º 24
0
def gen_doc(opts):
    common.myprint(opts, 'Generating doc for each function')

    # Build tree for api.md
    api = dict()
    for _, operator in operators.items():
        for c in operator.categories:
            if c not in api:
                api[c] = [operator]
            else:
                api[c].append(operator)

    # api.md
    # filename = os.path.join(opts.script_dir, '..','doc', 'markdown', 'api.md')
    filename = common.get_markdown_file(opts, 'api')
    if common.can_create_filename(opts, filename):
        with common.open_utf8(opts, filename) as fout:
            fout.write('# General API\n\n')
            fout.write('- [Memory function](memory.md)\n')
            fout.write('- [Float16 related functions](fp16.md)\n')
            fout.write('- [Defines provided by NSIMD](defines.md)\n')
            fout.write('- [NSIMD pack and related functions](pack.md)\n\n')
            fout.write('- [NSIMD C++20 concepts](concepts.md)\n\n')
            fout.write('# SIMD operators\n')
            for c, ops in api.items():
                if len(ops) == 0:
                    continue
                fout.write('\n## {}\n\n'.format(c.title))
                for op in ops:
                    Full_name = op.full_name[0].upper() + op.full_name[1:]
                    fout.write('- [{} ({})](api_{}.md)\n'.format(
                        Full_name, op.name, common.to_filename(op.name)))

    # helper to get list of function signatures
    def to_string(var):
        sigs = [var] if type(var) == str or not hasattr(var, '__iter__') \
                     else list(var)
        for i in range(0, len(sigs)):
            sigs[i] = re.sub('[ \n\t\r]+', ' ', sigs[i])
        return '\n'.join(sigs)

    # Operators (one file per operator)
    # dirname = os.path.join(opts.script_dir, '..','doc', 'markdown')
    dirname = common.get_markdown_dir(opts)
    common.mkdir_p(dirname)
    for op_name, operator in operators.items():
        # Skip non-matching doc
        if opts.match and not opts.match.match(op_name):
            continue
        # filename = os.path.join(dirname, 'api_{}.md'.format(common.to_filename(
        #                operator.name)))
        filename = common.get_markdown_api_file(opts, operator.name)
        if not common.can_create_filename(opts, filename):
            continue
        Full_name = operator.full_name[0].upper() + operator.full_name[1:]
        with common.open_utf8(opts, filename) as fout:
            fout.write('# {}\n\n'.format(Full_name))
            fout.write('## Description\n\n')
            fout.write(operator.desc)
            fout.write('\n\n## C base API (generic)\n\n')
            fout.write('```c\n')
            fout.write(to_string(operator.get_generic_signature('c_base')))
            fout.write('\n```\n\n')
            fout.write('\n\n## C advanced API (generic, requires C11)\n\n')
            fout.write('```c\n')
            fout.write(to_string(operator.get_generic_signature('c_adv')))
            fout.write('\n```\n\n')
            fout.write('## C++ base API (generic)\n\n')
            fout.write('```c++\n')
            fout.write(to_string(operator.get_generic_signature('cxx_base')))
            fout.write('\n```\n\n')
            fout.write('## C++ advanced API\n\n')
            fout.write('```c++\n')
            fout.write(to_string(operator.get_generic_signature('cxx_adv'). \
                                 values()))
            fout.write('\n```\n\n')
            fout.write('## C base API (architecture specifics)')
            for simd_ext in opts.simd:
                fout.write('\n\n### {}\n\n'.format(simd_ext.upper()))
                fout.write('```c\n')
                for typ in operator.types:
                    fout.write(operator.get_signature(typ, 'c_base', simd_ext))
                    fout.write(';\n')
                fout.write('```')
            fout.write('\n\n## C++ base API (architecture specifics)')
            for simd_ext in opts.simd:
                fout.write('\n\n### {}\n\n'.format(simd_ext.upper()))
                fout.write('```c\n')
                for typ in operator.types:
                    fout.write(
                        operator.get_signature(typ, 'cxx_base', simd_ext))
                    fout.write(';\n')
                fout.write('```')