示例#1
0
 def __get_spec_lib(self, state, opts):
     build_dir = self.store.build_dir
     conp = state['conp']
     kgen = get_specrates_kernel(self.store.reacs,
                                 self.store.specs,
                                 opts,
                                 conp=conp)
     # generate
     kgen.generate(build_dir)
     # write header
     write_aux(build_dir, opts, self.store.specs, self.store.reacs)
示例#2
0
    def test_compile_jacobian(self, state):
        lang = state['lang']
        jac_type = state['jac_type']
        opts, oploop = self.__get_objs(lang=lang)
        build_dir = self.store.build_dir
        obj_dir = self.store.obj_dir
        lib_dir = self.store.lib_dir
        packages = {'c': 'pyjac_c', 'opencl': 'pyjac_ocl'}
        for state in oploop:
            # clean old
            self.__cleanup()
            # create / write files
            build_dir = self.store.build_dir
            conp = state['conp']
            method = get_jacobian_kernel
            if jac_type == 'finite_difference':
                method = finite_difference_jacobian
            kgen = method(self.store.reacs, self.store.specs, opts, conp=conp)
            # generate
            kgen.generate(build_dir)
            # write header
            write_aux(build_dir, opts, self.store.specs, self.store.reacs)
            # test wrapper generation
            generate_wrapper(opts.lang,
                             build_dir,
                             obj_dir=obj_dir,
                             out_dir=lib_dir,
                             btype=build_type.jacobian)

            # create the test importer, and run
            imp = test_utils.get_import_source()
            with open(os.path.join(lib_dir, 'test_import.py'), 'w') as file:
                file.write(imp.substitute(path=lib_dir,
                                          package=packages[lang]))

            python_str = 'python{}.{}'.format(sys.version_info[0],
                                              sys.version_info[1])
            subprocess.check_call(
                [python_str,
                 os.path.join(lib_dir, 'test_import.py')])
示例#3
0
    def test_lockstep_driver(self):
        # get rate info
        rate_info = determine_jac_inds(self.store.reacs, self.store.specs,
                                       RateSpecialization.fixed)
        mod_test = get_run_source()

        for kind, loopy_opts in OptionLoopWrapper.from_get_oploop(
                self, do_ratespec=False, langs=get_test_langs(),
                do_vector=True, yield_index=True):

            # make namestore
            namestore = arc.NameStore(loopy_opts, rate_info)

            # kernel 1 - need the jacobian reset kernel
            reset = reset_arrays(loopy_opts, namestore)
            # kernel 2 - incrementer
            # make mapstore, arrays and kernel info
            mapstore = arc.MapStore(loopy_opts, namestore.phi_inds, None)

            # use arrays of 2 & 3 dimensions to test the driver's copying
            base_phi_shape = namestore.n_arr.shape
            P_lp, P_str = mapstore.apply_maps(namestore.P_arr,
                                              arc.global_ind)
            phi_lp, phi_str = mapstore.apply_maps(namestore.n_arr,
                                                  arc.global_ind,
                                                  arc.var_name)
            inputs = [P_lp.name, phi_lp.name]
            base_jac_shape = namestore.jac.shape
            jac_lp, jac_str = mapstore.apply_maps(namestore.jac,
                                                  arc.global_ind,
                                                  arc.var_name,
                                                  arc.var_name)
            outputs = [jac_lp.name]
            kernel_data = [P_lp, phi_lp, jac_lp]
            kernel_data.extend(arc.initial_condition_dimension_vars(
                loopy_opts, None))
            instructions = Template("""
                ${phi_str} = ${phi_str} + ${P_str} {id=0, dep=*}
                ${jac_str} = ${jac_str} + ${phi_str} {id=1, dep=0, nosync=0}
            """).safe_substitute(**locals())

            # handle atomicity
            can_vec, vec_spec = ic.get_deep_specializer(
                loopy_opts, atomic_ids=['1'])
            barriers = []
            if loopy_opts.depth:
                # need a barrier between the reset & the kernel
                barriers = [(0, 1, 'global')]

            inner_kernel = k_gen.knl_info(
                name='inner',
                instructions=instructions,
                mapstore=mapstore,
                var_name=arc.var_name,
                kernel_data=kernel_data,
                silenced_warnings=['write_race(0)', 'write_race(1)'],
                can_vectorize=can_vec,
                vectorization_specializer=vec_spec)

            # put it in a generator
            generator = k_gen.make_kernel_generator(
                loopy_opts, kernel_type=KernelType.dummy,
                name='inner_kernel', kernels=[reset, inner_kernel],
                namestore=namestore,
                input_arrays=inputs[:],
                output_arrays=outputs[:],
                is_validation=True,
                driver_type=DriverType.lockstep,
                barriers=barriers)

            # use a "weird" (non-evenly divisibly by vector width) test-size to
            # properly test the copy-in / copy-out
            test_size = self.store.test_size - 37
            if test_size <= 0:
                test_size = self.store.test_size - 1
                assert test_size > 0
            # and make
            with temporary_build_dirs() as (build, obj, lib):

                numpy_arrays = []

                def __save(shape, name, zero=False):
                    data = np.zeros(shape)
                    if not zero:
                        # make it a simple range
                        data.flat[:] = np.arange(np.prod(shape))
                    # save
                    myname = pjoin(lib, name + '.npy')
                    # need to split inputs / answer
                    np.save(myname, data.flatten('K'))
                    numpy_arrays.append(data.flatten('K'))

                # write 'data'
                import loopy as lp
                for arr in kernel_data:
                    if not isinstance(arr, lp.ValueArg):
                        __save((test_size,) + arr.shape[1:], arr.name,
                               arr.name in outputs)

                # and a parameter
                param = np.zeros((test_size,))
                param[:] = np.arange(test_size)

                # build code
                generator.generate(build,
                                   data_order=loopy_opts.order,
                                   data_filename='data.bin',
                                   for_validation=True)

                # write header
                write_aux(build, loopy_opts, self.store.specs, self.store.reacs)

                # generate wrapper
                pywrap(loopy_opts.lang, build,
                       obj_dir=obj, out_dir=lib,
                       ktype=KernelType.dummy,
                       file_base=generator.name,
                       additional_inputs=inputs[:],
                       additional_outputs=outputs[:])

                # and calling script
                test = pjoin(lib, 'test.py')

                inputs = utils.stringify_args(
                    [pjoin(lib, inp + '.npy') for inp in inputs], use_quotes=True)
                str_outputs = utils.stringify_args(
                    [pjoin(lib, inp + '.npy') for inp in outputs], use_quotes=True)

                num_threads = _get_test_input(
                    'num_threads', psutil.cpu_count(logical=False))
                with open(test, 'w') as file:
                    file.write(mod_test.safe_substitute(
                        package='pyjac_{lang}'.format(
                            lang=utils.package_lang[loopy_opts.lang]),
                        input_args=inputs,
                        test_arrays=str_outputs,
                        output_files=str_outputs,
                        looser_tols='[]',
                        loose_rtol=0,
                        loose_atol=0,
                        rtol=0,
                        atol=0,
                        non_array_args='{}, {}'.format(
                            test_size, num_threads),
                        kernel_name=generator.name.title(),))

                try:
                    utils.run_with_our_python([test])
                except subprocess.CalledProcessError:
                    logger = logging.getLogger(__name__)
                    logger.debug(utils.stringify_args(vars(loopy_opts), kwd=True))
                    assert False, 'lockstep_driver error'

                # calculate answers
                ns = base_jac_shape[1]
                # pressure is added to phi
                phi = numpy_arrays[1].reshape((test_size, ns),
                                              order=loopy_opts.order)
                p_arr = numpy_arrays[0]
                phi = phi + p_arr[:, np.newaxis]
                jac = numpy_arrays[2].reshape((test_size, ns, ns),
                                              order=loopy_opts.order)
                # and the diagonal of the jacobian has the updated pressure added
                jac[:, range(ns), range(ns)] += phi[:, range(ns)]
                # and read in outputs
                test = np.load(pjoin(lib, outputs[0] + '.npy')).reshape(
                    jac.shape, order=loopy_opts.order)
                assert np.array_equal(test, jac)
    def __run_test(self,
                   method,
                   test_python_wrapper=True,
                   ktype=KernelType.species_rates,
                   **oploop_keywords):
        kwargs = {}
        if not test_python_wrapper:
            kwargs['shared'] = [True, False]
        oploop_keywords.update(kwargs)
        ignored_state_vals = ['conp'] + list(kwargs.keys())

        wrapper = OptionLoopWrapper.from_get_oploop(
            self,
            ignored_state_vals=ignored_state_vals,
            do_conp=False,
            **oploop_keywords)
        for opts in wrapper:
            with temporary_build_dirs() as (build_dir, obj_dir, lib_dir):
                # write files
                # write files
                conp = wrapper.state['conp']
                kgen = method(self.store.reacs,
                              self.store.specs,
                              opts,
                              conp=conp)
                # generate
                kgen.generate(build_dir,
                              species_names=[x.name for x in self.store.specs],
                              rxn_strings=[str(x) for x in self.store.reacs])
                # write header
                write_aux(build_dir, opts, self.store.specs, self.store.reacs)
                if test_python_wrapper:
                    package = 'pyjac_{}'.format(utils.package_lang[opts.lang])
                    # test wrapper generation
                    pywrap(opts.lang,
                           build_dir,
                           obj_dir=obj_dir,
                           out_dir=lib_dir,
                           ktype=ktype)

                    imp = test_utils.get_import_source()
                    with open(os.path.join(lib_dir, 'test_import.py'),
                              'w') as file:
                        file.write(
                            imp.substitute(
                                path=lib_dir,
                                package=package,
                                kernel=utils.enum_to_string(ktype).title(),
                                nsp=len(self.store.specs),
                                nrxn=len(self.store.reacs)))

                    utils.run_with_our_python(
                        [os.path.join(lib_dir, 'test_import.py')])
                else:
                    # compile
                    generate_library(opts.lang,
                                     build_dir,
                                     obj_dir=obj_dir,
                                     out_dir=lib_dir,
                                     shared=wrapper.state['shared'],
                                     ktype=ktype)
    def test_read_initial_conditions(self):
        setup = test_utils.get_read_ics_source()
        wrapper = OptionLoopWrapper.from_get_oploop(self, do_conp=True)
        for opts in wrapper:
            with temporary_build_dirs() as (build_dir, obj_dir, lib_dir):
                conp = wrapper.state['conp']

                # make a dummy generator
                insns = ("""
                        {spec} = {param} {{id=0}}
                    """)
                domain = arc.creator('domain',
                                     arc.kint_type, (10, ),
                                     'C',
                                     initializer=np.arange(
                                         10, dtype=arc.kint_type))
                mapstore = arc.MapStore(opts, domain, None)
                # create global args
                param = arc.creator(arc.pressure_array, np.float64,
                                    (arc.problem_size.name, 10), opts.order)
                spec = arc.creator(arc.state_vector, np.float64,
                                   (arc.problem_size.name, 10), opts.order)
                namestore = type('', (object, ), {'jac': ''})
                # create array / array strings
                param_lp, param_str = mapstore.apply_maps(param, 'j', 'i')
                spec_lp, spec_str = mapstore.apply_maps(spec, 'j', 'i')

                # create kernel infos
                info = knl_info('spec_eval',
                                insns.format(param=param_str, spec=spec_str),
                                mapstore,
                                kernel_data=[spec_lp, param_lp, arc.work_size],
                                silenced_warnings=['write_race(0)'])
                # create generators
                kgen = make_kernel_generator(
                    opts,
                    KernelType.dummy, [info],
                    namestore,
                    input_arrays=[param.name, spec.name],
                    output_arrays=[spec.name],
                    name='ric_tester')
                # make kernels
                kgen._make_kernels()
                # and generate RIC
                _, record, _ = kgen._generate_wrapping_kernel(build_dir)
                kgen._generate_common(build_dir, record)
                ric = os.path.join(
                    build_dir,
                    'read_initial_conditions' + utils.file_ext[opts.lang])

                # write header
                write_aux(build_dir, opts, self.store.specs, self.store.reacs)
                with open(os.path.join(build_dir, 'setup.py'), 'w') as file:
                    file.write(
                        setup.safe_substitute(buildpath=build_dir,
                                              obj_dir=obj_dir))

                # and compile
                from pyjac.libgen import compile, get_toolchain
                toolchain = get_toolchain(opts.lang)
                compile(opts.lang, toolchain, [ric], obj_dir=obj_dir)

                # write wrapper
                self.__write_with_subs('read_ic_wrapper.pyx',
                                       os.path.join(self.store.script_dir,
                                                    'test_utils'),
                                       build_dir,
                                       header_ext=utils.header_ext[opts.lang])
                # setup
                utils.run_with_our_python([
                    os.path.join(build_dir, 'setup.py'), 'build_ext',
                    '--build-lib', lib_dir
                ])

                infile = os.path.join(self.store.script_dir, 'test_utils',
                                      'ric_tester.py.in')
                outfile = os.path.join(lib_dir, 'ric_tester.py')
                # cogify
                try:
                    Cog().callableMain([
                        'cogapp', '-e', '-d', '-Dconp={}'.format(conp), '-o',
                        outfile, infile
                    ])
                except Exception:
                    import logging
                    logger = logging.getLogger(__name__)
                    logger.error('Error generating initial conditions reader:'
                                 ' {}'.format(outfile))
                    raise

                # save phi, param in correct order
                phi = (self.store.phi_cp if conp else self.store.phi_cv)
                savephi = phi.flatten(opts.order)
                param = self.store.P if conp else self.store.V
                savephi.tofile(os.path.join(lib_dir, 'phi_test.npy'))
                param.tofile(os.path.join(lib_dir, 'param_test.npy'))

                # save bin file
                out_file = np.concatenate(
                    (
                        np.reshape(phi[:, 0], (-1, 1)),  # temperature
                        np.reshape(param, (-1, 1)),  # param
                        phi[:, 1:]),
                    axis=1  # species
                )
                out_file = out_file.flatten('K')
                with open(os.path.join(lib_dir, 'data.bin'), 'wb') as file:
                    out_file.tofile(file)

                # and run
                utils.run_with_our_python(
                    [outfile, opts.order,
                     str(self.store.test_size)])
示例#6
0
def test_strided_copy():
    wrapper = __test_cases()
    for opts in wrapper:
        lang = opts.lang
        order = opts.order
        depth = opts.depth
        width = opts.width

        with temporary_build_dirs() as (build_dir, obj_dir, lib_dir):
            vec_size = depth if depth else (width if width else 0)
            # set max per run such that we will have a non-full run (1024 - 1008)
            # this should also be evenly divisible by depth and width
            # (as should the non full run)
            max_per_run = 16
            # number of ics should be divisibly by depth and width
            ics = max_per_run * 8 + vec_size
            if vec_size:
                assert ics % vec_size == 0
                assert max_per_run % vec_size == 0
                assert int(np.floor(ics / max_per_run) * max_per_run) % vec_size == 0

            # build initial callgen
            callgen = CallgenResult(
                order=opts.order, lang=opts.lang,
                dev_mem_type=wrapper.state['dev_mem_type'],
                type_map=type_map(opts.lang))

            # set type
            dtype = np.dtype('float64')

            # create test arrays
            def __create(shape):
                if not isinstance(shape, tuple):
                    shape = (shape,)
                shape = (ics,) + shape
                arr = np.zeros(shape, dtype=dtype, order=order)
                arr.flat[:] = np.arange(np.prod(shape))
                return arr
            arrays = [__create(16), __create(10), __create(20), __create((20, 20)),
                      __create(())]
            const = [np.arange(10, dtype=dtype)]

            # max size for initialization in kernel
            max_size = max([x.size for x in arrays])

            def _get_dtype(dtype):
                return lp.to_loopy_type(
                    dtype, target=get_target(opts.lang))

            lp_arrays = [lp.GlobalArg('a{}'.format(i),
                                      shape=(arc.problem_size.name,) + a.shape[1:],
                                      order=order,
                                      dtype=_get_dtype(arrays[i].dtype))
                         for i, a in enumerate(arrays)] + \
                        [lp.TemporaryVariable(
                            'a{}'.format(i + len(arrays)),
                            dtype=_get_dtype(dtype), order=order,
                            initializer=const[i],
                            read_only=True, shape=const[i].shape)
                         for i in range(len(const))]
            const = lp_arrays[len(arrays):]

            # now update args
            callgen = callgen.copy(name='test',
                                   input_args={'test': [x for x in lp_arrays
                                               if x not in const]},
                                   output_args={'test' : []},
                                   host_constants={'test': const})

            temp_fname = os.path.join(build_dir, 'in' + utils.file_ext[lang])
            fname = os.path.join(build_dir, 'test' + utils.file_ext[lang])
            with open(temp_fname, 'w') as file:
                file.write(dedent("""
       /*[[[cog
            # expected globals:
            #   callgen      - path to serialized callgen object
            #   lang         - the language to use
            #   problem_size - the problem size
            #   max_per_run  - the run-size
            #   max_size     - the maximum array size
            #   order        - The data ordering

            import cog
            import os
            import numpy as np
            from six.moves import cPickle as pickle

            # unserialize the callgen
            with open(callgen, 'rb') as file:
                callgen = pickle.load(file)

            # determine the headers to include
            lang_headers = []
            if lang == 'opencl':
                lang_headers.extend([
                                '#include "memcpy_2d.oclh"',
                                '#include "vectorization.oclh"',
                                '#include <CL/cl.h>',
                                '#include "error_check.oclh"'])
            elif lang == 'c':
                lang_headers.extend([
                    '#include "memcpy_2d.hpp"',
                    '#include "error_check.hpp"'])
            cog.outl('\\n'.join(lang_headers))
            ]]]
            [[[end]]]*/

            // normal headers
            #include <stdlib.h>
            #include <string.h>
            #include <assert.h>


            int main()
            {
                /*[[[cog
                    if lang == 'opencl':
                        cog.outl(
                    'double* h_temp_d;\\n'
                    'int* h_temp_i;\\n'
                    '// create a context / queue\\n'
                    'int lim = 10;\\n'
                    'cl_uint num_platforms;\\n'
                    'cl_uint num_devices;\\n'
                    'cl_platform_id platform [lim];\\n'
                    'cl_device_id device [lim];\\n'
                    'cl_int return_code;\\n'
                    'cl_context context;\\n'
                    'cl_command_queue queue;\\n'
                    'check_err(clGetPlatformIDs(lim, platform, &num_platforms));\\n'
                    'for (int i = 0; i < num_platforms; ++i)\\n'
                    '{\\n'
                    '    check_err(clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_ALL, '
                    '    lim, device, &num_devices));\\n'
                    '    if(num_devices > 0)\\n'
                    '        break;\\n'
                    '}\\n'
                    'context = clCreateContext(NULL, 1, &device[0], NULL, NULL, '
                    '&return_code);\\n'
                    'check_err(return_code);\\n'
                    '//create queue\\n'
                    'queue = clCreateCommandQueue(context, device[0], 0, '
                    '&return_code);\\n'
                    'check_err(return_code);\\n')
                ]]]
                [[[end]]]*/

                /*[[[cog

                    # determine maximum array size
                    cog.outl('double zero [{max_size}] = {{0}};'.format(
                        max_size=max_size))

                    # init variables
                    cog.outl('int problem_size = {};'.format(problem_size))
                    cog.outl('int per_run = {};'.format(max_per_run))
                  ]]]
                  [[[end]]]*/

                /*[[[cog
                    # create memory tool
                    from string import Template
                    import loopy as lp
                    from pyjac.kernel_utils.memory_tools import get_memory
                    from pyjac.kernel_utils.memory_tools import HostNamer
                    from pyjac.kernel_utils.memory_tools import DeviceNamer
                    mem = get_memory(callgen, host_namer=HostNamer(),
                                     device_namer=DeviceNamer())

                    # declare host and device arrays
                    for arr in callgen.kernel_args['test'] + callgen.work_arrays:
                        if not isinstance(arr, lp.ValueArg):
                            cog.outl(mem.define(False, arr))
                            cog.outl(mem.define(True, arr))
                    # define host constants
                    for arr in callgen.host_constants['test']:
                        cog.outl(mem.define(False, arr, host_constant=True,
                                            force_no_const=True))
                        cog.outl(mem.define(True, arr))

                    # and declare the temporary array
                    cog.outl(mem.define(True, lp.GlobalArg(
                        'temp_d', dtype=lp.to_loopy_type(np.float64))))

                    # allocate host and device arrays
                    for arr in callgen.kernel_args['test'] + callgen.work_arrays:
                        if not isinstance(arr, lp.ValueArg):
                            cog.outl(mem.alloc(False, arr))
                            cog.outl(mem.alloc(True, arr))
                    for arr in callgen.host_constants['test']:
                        # alloc device version of host constant
                        cog.outl(mem.alloc(True, arr))
                        # copy host constants
                        cog.outl(mem.copy(True, arr, host_constant=True))

                    def _get_size(arr):
                        size = 1
                        for x in arr.shape:
                            if not isinstance(x, int):
                                assert x.name == 'problem_size'
                                size *= int(problem_size)
                            else:
                                size *= x
                        return size

                    # save copies of host arrays
                    host_copies = [Template(
                        '${type} ${save} [${size}] = {${vals}};\\n'
                        'memset(${host}, 0, ${size} * sizeof(${type}));'
                        ).safe_substitute(
                            save='h_' + arr.name + '_save',
                            host='h_' + arr.name,
                            size=_get_size(arr),
                            vals=', '.join([str(x) for x in np.arange(
                                _get_size(arr)).flatten(order)]),
                            type=callgen.type_map[arr.dtype])
                            for arr in callgen.kernel_args['test'] +
                                       callgen.host_constants['test']]
                    for hc in host_copies:
                        cog.outl(hc)
                  ]]]
                  [[[end]]]*/

            // kernel
            for (size_t offset = 0; offset < problem_size; offset += per_run)
            {
                int this_run = problem_size - offset < per_run ? \
                    problem_size - offset : per_run;
                /* Memory Transfers into the kernel, if any */
                /*[[[cog
                  mem2 = get_memory(callgen, host_namer=HostNamer(postfix='_save'),
                                    device_namer=DeviceNamer())
                  for arr in callgen.kernel_args['test']:
                      cog.outl(mem2.copy(True, arr))
                  ]]]
                  [[[end]]]*/

                /* Memory Transfers out */
                /*[[[cog
                  for arr in callgen.kernel_args['test']:
                      cog.outl(mem.copy(False, arr))
                  ]]]
                  [[[end]]]*/
            }

                /*[[[cog
                    # and finally check
                    check_template = Template(
                        'for(int i = 0; i < ${size}; ++i)\\n'
                        '{\\n'
                        '    assert(${host}[i] == ${save}[i]);\\n'
                        '}\\n')
                    checks = [check_template.safe_substitute(
                        host=mem.get_name(False, arr),
                        save=mem2.get_name(False, arr),
                        size=_get_size(arr))
                              for arr in callgen.kernel_args['test']]
                    for check in checks:
                        cog.outl(check)
                  ]]]
                  [[[end]]]*/

                /*[[[cog
                    if lang == 'opencl':
                        cog.outl('check_err(clFlush(queue));')
                        cog.outl('check_err(clReleaseCommandQueue(queue));')
                        cog.outl('check_err(clReleaseContext(context));')
                  ]]]
                  [[[end]]]*/
                return 0;
            }
            """.strip()))

            # serialize callgen
            with open(os.path.join(build_dir, 'callgen.pickle'), 'wb') as file:
                pickle.dump(callgen, file)

            # cogify
            from cogapp import Cog
            cmd = [
                'cog', '-e', '-d', '-Dcallgen={}'.format(
                    os.path.join(build_dir, 'callgen.pickle')),
                '-Dmax_per_run={}'.format(max_per_run),
                '-Dproblem_size={}'.format(ics),
                '-Dmax_size={}'.format(max_size),
                '-Dlang={}'.format(lang),
                '-Dorder={}'.format(order),
                '-o', fname, temp_fname]
            Cog().callableMain(cmd)

            files = [fname]
            # write aux
            write_aux(build_dir, opts, [], [])

            # copy any deps
            def __copy_deps(lang, scan_path, out_path, change_extension=True,
                            ffilt=None, nfilt=None):
                deps = [x for x in os.listdir(scan_path) if os.path.isfile(
                    os.path.join(scan_path, x)) and not x.endswith('.in')]
                if ffilt is not None:
                    deps = [x for x in deps if ffilt in x]
                if nfilt is not None:
                    deps = [x for x in deps if nfilt not in x]
                files = []
                for dep in deps:
                    dep_dest = dep
                    dep_is_header = dep.endswith(utils.header_ext[lang])
                    ext = (utils.file_ext[lang] if not dep_is_header
                           else utils.header_ext[lang])
                    if change_extension and not dep.endswith(ext):
                        dep_dest = dep[:dep.rfind('.')] + ext
                    shutil.copyfile(os.path.join(scan_path, dep),
                                    os.path.join(out_path, dep_dest))
                    if not dep_is_header:
                        files.append(os.path.join(out_path, dep_dest))
                return files

            scan = os.path.join(script_dir, os.pardir, 'kernel_utils', lang)
            files += __copy_deps(lang, scan, build_dir, nfilt='.py')
            scan = os.path.join(script_dir, os.pardir, 'kernel_utils', 'common')
            files += __copy_deps(host_langs[lang], scan, build_dir,
                                 change_extension=False, ffilt='memcpy_2d')

            # build
            toolchain = get_toolchain(lang)
            obj_files = compile(
                lang, toolchain, files, source_dir=build_dir, obj_dir=obj_dir)
            lib = link(toolchain, obj_files, 'memory_test', lib_dir=lib_dir)
            # and run
            subprocess.check_call(lib)
示例#7
0
def test_strided_copy(state):
    lang = state['lang']
    order = state['order']
    depth = state['depth']
    width = state['width']

    # cleanup
    clean_dir(build_dir)
    clean_dir(obj_dir)
    clean_dir(lib_dir)

    # create
    utils.create_dir(build_dir)
    utils.create_dir(obj_dir)
    utils.create_dir(lib_dir)

    vec_size = depth if depth else (width if width else 0)
    # set max per run such that we will have a non-full run (1024 - 1008)
    # this should also be evenly divisible by depth and width
    # (as should the non full run)
    max_per_run = 16
    # number of ics should be divisibly by depth and width
    ics = max_per_run * 8 + vec_size
    if vec_size:
        assert ics % vec_size == 0
        assert max_per_run % vec_size == 0
        assert int(np.floor(ics / max_per_run) * max_per_run) % vec_size == 0
    dtype = np.dtype('float64')

    # create test arrays
    def __create(shape):
        if not isinstance(shape, tuple):
            shape = (shape, )
        shape = (ics, ) + shape
        arr = np.zeros(shape, dtype=dtype, order=order)
        arr.flat[:] = np.arange(np.prod(shape))
        return arr

    arrays = [
        __create(16),
        __create(10),
        __create(20),
        __create((20, 20)),
        __create(())
    ]
    const = [np.arange(10, dtype=dtype)]
    lp_arrays = [lp.GlobalArg('a{}'.format(i), shape=('problem_size',) + a.shape[1:],
                              order=order, dtype=(arrays + const)[i].dtype)
                 for i, a in enumerate(arrays)] + \
                [lp.TemporaryVariable('a{}'.format(i + len(arrays)), dtype=dtype,
                 order=order, initializer=const[i], read_only=True,
                 shape=const[i].shape) for i in range(len(const))]
    const = lp_arrays[len(arrays):]

    dtype = 'double'

    # create array splitter
    opts = type('', (object, ), {
        'width': width,
        'depth': depth,
        'order': order,
        'lang': lang
    })
    asplit = array_splitter(opts)

    # split numpy
    arrays = asplit.split_numpy_arrays(arrays)
    # make dummy knl
    knl = lp.make_kernel(
        '{[i]: 0 <= i <= 1}', """
                            if i > 1
                                a0[i, i] = 0
                                a1[i, i] = 0
                                a2[i, i] = 0
                                a3[i, i, i] = 0
                                a4[i] = 0
                                <> k = a5[i]
                            end
                         """, lp_arrays)
    # split loopy
    lp_arrays = asplit.split_loopy_arrays(knl).args

    # now create a simple library
    mem = memory_manager(opts.lang,
                         opts.order,
                         asplit._have_split(),
                         dev_type=state['device_type'],
                         strided_c_copy=lang == 'c')
    mem.add_arrays([x for x in lp_arrays],
                   in_arrays=[x.name for x in lp_arrays if x not in const],
                   out_arrays=[x.name for x in lp_arrays if x not in const],
                   host_constants=const)

    # create "kernel"
    size_type = 'int'
    lang_headers = []
    if lang == 'opencl':
        lang_headers.extend([
            '#include "memcpy_2d.oclh"', '#include "vectorization.oclh"',
            '#include <CL/cl.h>', '#include "ocl_errorcheck.oclh"'
        ])
        size_type = 'cl_uint'
    elif lang == 'c':
        lang_headers.extend(
            ['#include "memcpy_2d.h"', '#include "error_check.h"'])

    # kernel must copy in and out, using the mem_manager's format
    knl = Template("""
    for (size_t offset = 0; offset < problem_size; offset += per_run)
    {
        ${type} this_run = problem_size - offset < per_run ? \
            problem_size - offset : per_run;
        /* Memory Transfers into the kernel, if any */
        ${mem_transfers_in}

        /* Memory Transfers out */
        ${mem_transfers_out}
    }
    """).safe_substitute(type=size_type,
                         mem_transfers_in=mem._mem_transfers(
                             to_device=True, host_postfix='_save'),
                         mem_transfers_out=mem.get_mem_transfers_out(),
                         problem_size=ics)

    # create the host memory allocations
    host_names = ['h_' + arr.name for arr in lp_arrays]
    host_allocs = mem.get_mem_allocs(True, host_postfix='')

    # device memory allocations
    device_allocs = mem.get_mem_allocs(False)

    # copy to save for test
    host_name_saves = ['h_' + a.name + '_save' for a in lp_arrays]
    host_const_allocs = mem.get_host_constants()
    host_copies = [
        Template("""
        ${type} ${save} [${size}] = {${vals}};
        memset(${host}, 0, ${size} * sizeof(${type}));
        """).safe_substitute(save='h_' + lp_arrays[i].name + '_save',
                             host='h_' + lp_arrays[i].name,
                             size=arrays[i].size,
                             vals=', '.join(
                                 [str(x) for x in arrays[i].flatten()]),
                             type=dtype) for i in range(len(arrays))
    ]

    # and finally checks
    check_template = Template("""
        for(int i = 0; i < ${size}; ++i)
        {
            assert(${host}[i] == ${save}[i]);
        }
    """)
    checks = [
        check_template.safe_substitute(host=host_names[i],
                                       save=host_name_saves[i],
                                       size=arrays[i].size)
        for i in range(len(arrays))
    ]

    # and preambles
    ocl_preamble = """
    double* temp_d;
    int* temp_i;
    // create a context / queue
    int lim = 10;
    cl_uint num_platforms;
    cl_uint num_devices;
    cl_platform_id platform [lim];
    cl_device_id device [lim];
    cl_int return_code;
    cl_context context;
    cl_command_queue queue;
    check_err(clGetPlatformIDs(lim, platform, &num_platforms));
    for (int i = 0; i < num_platforms; ++i)
    {
        check_err(clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_ALL, lim, device,
            &num_devices));
        if(num_devices > 0)
            break;
    }
    context = clCreateContext(NULL, 1, &device[0], NULL, NULL, &return_code);
    check_err(return_code);

    //create queue
    queue = clCreateCommandQueue(context, device[0], 0, &return_code);
    check_err(return_code);
    """
    preamble = ''
    if lang == 'opencl':
        preamble = ocl_preamble

    end = ''
    if lang == 'opencl':
        end = """
        check_err(clFlush(queue));
        check_err(clReleaseCommandQueue(queue));
        check_err(clReleaseContext(context));
    """

    file_src = Template("""
${lang_headers}
#include <stdlib.h>
#include <string.h>
#include <assert.h>


void main()
{
    ${preamble}

    double zero [${max_dim}] = {0};

    ${size_type} problem_size = ${problem_size};
    ${size_type} per_run = ${max_per_run};

    ${host_allocs}
    ${host_const_allocs}
    ${mem_declares}
    ${device_allocs}

    ${mem_saves}

    ${host_constant_copy}

    ${knl}

    ${checks}

    ${end}

    exit(0);
}
    """).safe_substitute(lang_headers='\n'.join(lang_headers),
                         mem_declares=mem.get_defns(),
                         host_allocs=host_allocs,
                         host_const_allocs=host_const_allocs,
                         device_allocs=device_allocs,
                         mem_saves='\n'.join(host_copies),
                         host_constant_copy=mem.get_host_constants_in(),
                         checks='\n'.join(checks),
                         knl=knl,
                         preamble=preamble,
                         end=end,
                         size_type=size_type,
                         max_per_run=max_per_run,
                         problem_size=ics,
                         max_dim=max([x.size for x in arrays]))

    # write file
    fname = os.path.join(build_dir, 'test' + utils.file_ext[lang])
    with open(fname, 'w') as file:
        file.write(file_src)
    files = [fname]

    # write aux
    write_aux(build_dir, opts, [], [])

    # copy any deps
    def __copy_deps(lang,
                    scan_path,
                    out_path,
                    change_extension=True,
                    ffilt=None):
        deps = [
            x for x in os.listdir(scan_path)
            if os.path.isfile(os.path.join(scan_path, x))
            and not x.endswith('.in')
        ]
        if ffilt is not None:
            deps = [x for x in deps if ffilt in x]
        files = []
        for dep in deps:
            dep_dest = dep
            dep_is_header = dep.endswith(utils.header_ext[lang])
            ext = (utils.file_ext[lang]
                   if not dep_is_header else utils.header_ext[lang])
            if change_extension and not dep.endswith(ext):
                dep_dest = dep[:dep.rfind('.')] + ext
            shutil.copyfile(os.path.join(scan_path, dep),
                            os.path.join(out_path, dep_dest))
            if not dep_is_header:
                files.append(os.path.join(out_path, dep_dest))
        return files

    scan = os.path.join(script_dir, os.pardir, 'kernel_utils', lang)
    files += __copy_deps(lang, scan, build_dir)
    scan = os.path.join(script_dir, os.pardir, 'kernel_utils', 'common')
    files += __copy_deps(host_langs[lang],
                         scan,
                         build_dir,
                         change_extension=False,
                         ffilt='memcpy_2d')

    # build
    files = [
        file_struct(lang, lang, f[:f.rindex('.')], [build_dir], [], build_dir,
                    obj_dir, True, True) for f in files
    ]
    assert not any(compiler(x) for x in files)
    lib = libgen(lang, obj_dir, lib_dir, [x.filename for x in files], True,
                 False, True)
    lib = os.path.join(lib_dir, lib)
    # and run
    subprocess.check_call(lib)
示例#8
0
    def test_read_initial_conditions(self):
        build_dir = self.store.build_dir
        obj_dir = self.store.obj_dir
        lib_dir = self.store.lib_dir
        setup = test_utils.get_read_ics_source()
        utils.create_dir(build_dir)
        utils.create_dir(obj_dir)
        utils.create_dir(lib_dir)
        oploop = OptionLoop(
            OrderedDict([
                # no need to test conv
                ('conp', [True]),
                ('order', ['C', 'F']),
                ('depth', [4, None]),
                ('width', [4, None]),
                ('lang', ['c'])
            ]))
        for state in oploop:
            if state['depth'] and state['width']:
                continue
            self.__cleanup(False)
            # create dummy loopy opts
            opts = type('', (object, ), state)()
            asplit = array_splitter(opts)

            # get source
            path = os.path.realpath(
                os.path.join(self.store.script_dir, os.pardir, 'kernel_utils',
                             'common', 'read_initial_conditions.c.in'))

            with open(path, 'r') as file:
                ric = Template(file.read())
            # subs
            ric = ric.safe_substitute(mechanism='mechanism.h',
                                      vectorization='vectorization.h')
            # write
            with open(os.path.join(build_dir, 'read_initial_conditions.c'),
                      'w') as file:
                file.write(ric)
            # write header
            write_aux(build_dir, opts, self.store.specs, self.store.reacs)
            # write setup
            with open(os.path.join(build_dir, 'setup.py'), 'w') as file:
                file.write(setup.safe_substitute(buildpath=build_dir))
            # copy read ics header to final dest
            shutil.copyfile(
                os.path.join(self.store.script_dir, os.pardir, 'kernel_utils',
                             'common', 'read_initial_conditions.h'),
                os.path.join(build_dir, 'read_initial_conditions.h'))
            # copy wrapper
            shutil.copyfile(
                os.path.join(self.store.script_dir, 'test_utils',
                             'read_ic_wrapper.pyx'),
                os.path.join(build_dir, 'read_ic_wrapper.pyx'))
            # setup
            python_str = 'python{}.{}'.format(sys.version_info[0],
                                              sys.version_info[1])
            call = [
                python_str,
                os.path.join(build_dir, 'setup.py'), 'build_ext',
                '--build-lib', lib_dir
            ]
            subprocess.check_call(call)
            # copy in tester
            shutil.copyfile(
                os.path.join(self.store.script_dir, 'test_utils',
                             'ric_tester.py'),
                os.path.join(lib_dir, 'ric_tester.py'))

            # For simplicity (and really, lack of need) we test CONP only
            # hence, the extra variable is the volume, while the fixed parameter
            # is the pressure

            # save phi, param in correct order
            phi = (self.store.phi_cp if opts.conp else self.store.phi_cv)
            save_phi, = asplit.split_numpy_arrays(phi)
            save_phi = save_phi.flatten(opts.order)
            param = self.store.P if opts.conp else self.store.V
            save_phi.tofile(os.path.join(lib_dir, 'phi_test.npy'))
            param.tofile(os.path.join(lib_dir, 'param_test.npy'))

            # save bin file
            out_file = np.concatenate(
                (
                    np.reshape(phi[:, 0], (-1, 1)),  # temperature
                    np.reshape(param, (-1, 1)),  # param
                    phi[:, 1:]),
                axis=1  # species
            )
            out_file = out_file.flatten('K')
            with open(os.path.join(lib_dir, 'data.bin'), 'wb') as file:
                out_file.tofile(file)

            # and run
            subprocess.check_call([
                python_str,
                os.path.join(lib_dir, 'ric_tester.py'), opts.order,
                str(self.store.test_size)
            ])