Exemplo n.º 1
0
 def override_log(key, old, new):
     logging.info(
         'Replacing {} for test type: {}. Old value:'
         ' ({}), New value: ({})'.format(
             key,
             stringify_args(
                 [ttype, test['eval-type'], jtype, stype],
                 joiner='.'), stringify_args(listify(old)),
             stringify_args(listify(new))))
Exemplo n.º 2
0
 def __init__(self, bad_inputs):
     from pyjac.utils import stringify_args, listify
     self.message = (
         'Inputs: ({}) were incorrectly, or conflictingly specified. '
         'See debug output for more information'.format(
             stringify_args(listify(bad_inputs))))
     super(InvalidInputSpecificationException, self).__init__(self.message)
Exemplo n.º 3
0
 def __test_limit(enumlist, limit):
     stypes = [enum_to_string(enum) for enum in listify(enumlist)]
     root = models['TestMech']['limits']
     for i, stype in enumerate(stypes):
         assert stype in root
         if i == len(stypes) - 1:
             assert root[stype] == limit
         else:
             root = root[stype]
Exemplo n.º 4
0
    def __internal_validator(self,
                             field,
                             valuelist,
                             valid,
                             message,
                             necessary=True):
        valuelist = listify(valuelist)
        if six.callable(valid):
            badvals = [x for x in valuelist if not valid(x)]
        else:
            badvals = [x for x in valuelist if x not in valid]
        if badvals and necessary:
            args = (badvals, )
            if not six.callable(valid):
                args = (badvals, valid)

            self._error(
                field, message.format(*tuple(stringify_args(x) for x in args)))
Exemplo n.º 5
0
def build_and_validate(schema,
                       source,
                       validator=CustomValidator,
                       includes=[],
                       allow_unknown=False,
                       update=False):
    """
    Builds schema from file, validates source from file and returns results.
    Convience method for :func:`build_schema` and :func:`validate`

    Parameters
    ----------
    Parameters
    ----------
    schema: str
        The schema to parse
    source: str
        Path to the source file
    validators: list of :class:`Validator` [:func:`get_validators()`]
        The validators to use, by defaut use the output of get_validators()
    includes: list of str
        Additional schema to use for includes
    allow_unknown: bool [False]
        Allow unknown keys
    update: bool [False]
        Allow partial specification of data, useful for testing specific parts
        of schemas

    Returns
    -------
    data: dict
        The validated data
    """
    includes = listify(includes)
    if 'common_schema.yaml' != source:
        includes.append('common_schema.yaml')
    built = build_schema(schema,
                         validatorclass=validator,
                         includes=includes,
                         allow_unknown=allow_unknown,
                         update=update)
    return validate(built, source, filename=schema)
Exemplo n.º 6
0
 def add_headers(self, headers):
     headers = utils.listify(headers)
     self.headers.extend(headers)
Exemplo n.º 7
0
def _dummy_opts(order='C'):
    for opts in opts_loop(order=listify(order), lang=['c']):
        return opts
Exemplo n.º 8
0
def load_platforms(matrix, langs=get_test_langs(), raise_on_empty=False):
    """
    Loads test platforms from the :param:`matrix` file, for the :param:`langs`

    Parameters
    ----------
    matrix: dict
        A loaded test matrix from :func:`get_test_matrix`
    langs: list of str
        The allowed languages, modifiable by the :envvar:`TEST_LANGS` or test_langs
        in :file:`test_setup.py`
    raise_on_empty: bool [False]
        If True, and the supplied matrix has no platforms raise an exception

    Returns
    -------
    pre-loop: list of tuples
        The parameters that may be converted into a :class:`optionloop.OptionLoop`
    """

    oploop = []
    try:
        # try to use user-specified platforms
        platforms = matrix[platform_list_key]
        # put into oploop form, and make repeatable
        for p in sorted(platforms, key=lambda x: x['name']):
            # limit to supplied languages
            inner_loop = []
            allowed_langs = langs[:]
            if 'lang' in p:
                # pull from platform languages if possible
                allowed_langs = p['lang'] if p['lang'] in allowed_langs else []
            if not allowed_langs:
                # can't use language
                continue

            # set lang
            inner_loop.append(('lang', allowed_langs))

            # get vectorization type and size
            vectype = listify('par' if (
                'vectype' not in p or not can_vectorize_lang[allowed_langs]
            ) else p['vectype'])
            # check if we have a vectorization
            if not (len(vectype) == 1 and vectype[0] == 'par'):
                # try load the vecsize, fail on missing
                try:
                    vecsize = [x for x in listify(p['vecsize'])]
                except TypeError:
                    raise Exception(
                        'Platform {} has non-parallel vectype(s) {} but no supplied '
                        'vector size.'.format(
                            p['name'], [x for x in vectype if x != 'par']))

                add_none = 'par' in vectype
                for v in [x.lower() for x in vectype]:

                    def _get(add_none):
                        if add_none:
                            return vecsize + [None]
                        return vecsize

                    if v == 'wide':
                        inner_loop.append(('width', _get(add_none)))
                        add_none = False
                    elif v == 'deep':
                        inner_loop.append(('depth', _get(add_none)))
                        add_none = False
                    elif v != 'par':
                        raise Exception(
                            'Platform {} has invalid supplied vectype '
                            '{}'.format(p['name'], v))

            # fill in missing vectypes
            for x in ['width', 'depth']:
                if next((y for y in inner_loop if y[0] == x), None) is None:
                    inner_loop.append((x, None))

            # check for atomics
            if 'atomics' in p:
                inner_loop.append(('use_atomics', p['atomics']))

            # and store platform
            inner_loop.append(('platform', p['name']))

            # finally check for seperate_kernels
            sep_knl = True
            if 'seperate_kernels' in p and not p['seperate_kernels']:
                sep_knl = False
            inner_loop.append(('seperate_kernels', sep_knl))

            # create option loop and add
            oploop += [inner_loop]
    except (TypeError, KeyError):
        if raise_on_empty:
            raise Exception('Supplied test matrix has no platforms.')

    finally:
        if not oploop:
            # file not found, or no appropriate targets for specified languages
            for lang in langs:
                inner_loop = []
                vecsizes = [4, None] if can_vectorize_lang[lang] else [None]
                inner_loop = [('lang', lang)]
                if lang == 'opencl':
                    import pyopencl as cl
                    inner_loop += [('width', vecsizes[:]),
                                   ('depth', vecsizes[:])]
                    # add all devices
                    device_types = [
                        cl.device_type.CPU, cl.device_type.GPU,
                        cl.device_type.ACCELERATOR
                    ]
                    platforms = cl.get_platforms()
                    platform_list = []
                    for p in platforms:
                        for dev_type in device_types:
                            devices = p.get_devices(dev_type)
                            if devices:
                                plist = [('platform', p.name)]
                                use_atomics = False
                                if 'cl_khr_int64_base_atomics' in \
                                        devices[0].extensions:
                                    use_atomics = True
                                plist.append(('use_atomics', use_atomics))
                                platform_list.append(plist)
                    for p in platform_list:
                        # create option loop and add
                        oploop += [inner_loop + p]
                elif lang == 'c':
                    inner_loop += [('platform', 'OpenMP')]
                    oploop += [inner_loop]
    return oploop
Exemplo n.º 9
0
def get_test_matrix(work_dir,
                    test_type,
                    test_matrix,
                    for_validation,
                    raise_on_missing=True,
                    langs=get_test_langs()):
    """Runs a set of mechanisms and an ordered dictionary for
    performance and functional testing

    Parameters
    ----------
    work_dir : str
        Working directory with mechanisms and for data
    test_type: :class:`build_type.jacobian`
        Controls some testing options (e.g., whether to do a sparse matrix or not)
    test_matrix: str
        The test matrix file to load
    for_validation: bool
        If determines which test type to load from the test matrix,
        validation or performance
    raise_on_missing: bool
        Raise an exception of the specified :param:`test_matrix` file is not found
    langs: list of str
        The allowed languages, modifiable by the :envvar:`TEST_LANGS` or test_langs
        in :file:`test_setup.py`
    Returns
    -------
    mechanisms : dict
        A dictionary indicating which mechanism are available for testing,
        The structure is as follows:
            mech_name : {'mech' : file path to the Cantera mechanism
                         'ns' : number of species in the mechanism
                         'limits' : {'full': XXX, 'sparse': XXX}}: a dictionary of
                            limits on the number of conditions that can be evaluated
                            for this mechanism (full & sparse jacobian respectively)
                            due to memory constraints
    params  : OrderedDict
        The parameters to put in an oploop
    max_vec_width : int
        The maximum vector width to test

    """
    work_dir = abspath(work_dir)

    # validate the test matrix
    matrix_name = test_matrix
    test_matrix = build_and_validate('test_matrix_schema.yaml', test_matrix)

    # check that we have the working directory
    if not exists(work_dir):
        raise Exception('Work directory {} for '.format(work_dir) +
                        'testing not found, exiting...')

    # load the models
    models = load_models(work_dir, test_matrix)
    assert isinstance(test_type, build_type)

    # load tests
    tests = load_tests(test_matrix, matrix_name)
    # filter those that match the test type
    valid_str = 'validation' if for_validation else 'performance'
    tests = [test for test in tests if test['test-type'] == valid_str]
    tests = [
        test for test in tests
        if test['eval-type'] == enum_to_string(test_type)
        or test['eval-type'] == 'both'
    ]
    # and dictify
    tests = [OrderedDict(test) for test in tests]
    if not tests:
        raise Exception('No tests found in matrix {} for {} test of {}, '
                        'exiting...'.format(matrix_name, valid_str,
                                            enum_to_string(test_type)))

    # get defaults we haven't migrated to schema yet
    rate_spec = ['fixed', 'hybrid'] if test_type != build_type.jacobian \
        else ['fixed']
    sparse = ([
        enum_to_string(JacobianFormat.sparse),
        enum_to_string(JacobianFormat.full)
    ] if test_type == build_type.jacobian else
              [enum_to_string(JacobianFormat.full)])
    jac_types = [
        enum_to_string(JacobianType.exact),
        enum_to_string(JacobianType.finite_difference)
    ] if (test_type == build_type.jacobian
          and not for_validation) else [enum_to_string(JacobianType.exact)]
    split_kernels = [False]

    # and default # of cores, this may be overriden
    default_num_cores, can_override_cores = num_cores_default()

    # load platforms
    platforms = load_platforms(test_matrix,
                               langs=langs,
                               raise_on_empty=raise_on_missing)
    platforms = [OrderedDict(platform) for platform in platforms]
    out_params = []
    logger = logging.getLogger(__name__)
    for test in tests:
        # filter platforms
        plats = [p.copy() for p in platforms]
        if 'platforms' in test:
            plats = [
                plat for plat in plats if plat['platform'] in test['platforms']
            ]
            if len(plats) < len(platforms):
                logger.info(
                    'Platforms ({}) filtered out for test type: {}'.format(
                        ', '.join([
                            p['platform'] for p in platforms if p not in plats
                        ]), ' - '.join([test['test-type'],
                                        test['eval-type']])))
        if not len(plats):
            logger.warn('No platforms found for test {}, skipping...'.format(
                ' - '.join([test['test-type'], test['eval-type']])))
            continue

        for plookup in plats:
            clean = plookup.copy()
            # get default number of cores
            cores = default_num_cores[:]
            # get default vector widths
            widths = plookup['width']
            is_wide = widths is not None
            depths = plookup['depth']
            is_deep = depths is not None
            if is_deep and not is_wide:
                widths = depths[:]
            # sanity check
            if is_wide or is_deep:
                assert widths is not None
            # special gpu handling for cores
            is_gpu = False
            # test platform type
            if platform_is_gpu(plookup['platform']):
                # set cores to 1
                is_gpu = True
                cores = [1]

            def apply_vectypes(lookup,
                               widths,
                               is_wide=is_wide,
                               is_deep=is_deep):
                if is_wide or is_deep:
                    # set vec widths
                    use_par = None in widths or (is_wide and is_deep)
                    lookup['vecsize'] = [x for x in widths[:] if x is not None]
                    base = [True] if not use_par else [True, False]
                    if is_wide:
                        lookup['wide'] = base[:]
                        base.pop()
                    if is_deep:
                        lookup['deep'] = base[:]
                else:
                    lookup['vecsize'] = [None]
                    lookup['wide'] = [False]
                    lookup['deep'] = [False]
                del lookup['width']
                del lookup['depth']

            apply_vectypes(plookup, widths)

            # default is both conp / conv
            conp = [True, False]
            order = ['C', 'F']

            # loop over possible overrides
            oploop = OptionLoop(
                OrderedDict([('ttype', [enum_to_string(test_type)]),
                             ('jtype', jac_types), ('stype', sparse)]))
            for i, state in enumerate(oploop):
                ttype = state['ttype']
                jtype = state['jtype']
                stype = state['stype']

                def override_log(key, old, new):
                    logging.info(
                        'Replacing {} for test type: {}. Old value:'
                        ' ({}), New value: ({})'.format(
                            key,
                            stringify_args(
                                [ttype, test['eval-type'], jtype, stype],
                                joiner='.'), stringify_args(listify(old)),
                            stringify_args(listify(new))))

                # copy defaults
                icores = cores[:]
                iorder = order[:]
                iconp = conp[:]
                ivecsizes = widths[:] if widths is not None else [None]
                imodels = tuple(models.keys())
                # load overides
                overrides = get_overrides(test, ttype, jtype, stype)

                # check that we can apply
                if 'num_cores' in overrides and not can_override_cores:
                    raise InvalidTestEnivironmentException(
                        ttype, 'num_cores', matrix_name, 'num_threads')
                elif 'num_cores' in overrides and is_gpu:
                    logger = logging.getLogger(__name__)
                    logger.info(
                        'Discarding unused "num_cores" override for GPU '
                        'platform {}'.format(plookup['platform']))
                    del overrides['num_cores']

                # 'num_cores', 'order', 'conp', 'vecsize', 'vectype'
                # now apply overrides
                outplat = plookup.copy()
                for current in overrides:
                    ivectypes_override = None
                    for override in overrides:
                        if override == 'num_cores':
                            override_log('num_cores', icores,
                                         overrides[override])
                            icores = overrides[override]
                        elif override == 'order' and not is_gpu:
                            override_log('order', iorder, overrides[override])
                            iorder = overrides[override]
                        elif override == 'gpuorder' and is_gpu:
                            override_log('order', iorder, overrides[override])
                            iorder = overrides[override]
                        elif override == 'conp':
                            iconp_save = iconp[:]
                            iconp = []
                            if 'conp' in overrides[override]:
                                iconp.append(True)
                            if 'conv' in overrides[override]:
                                iconp.append(False)
                            override_log('conp', iconp_save, iconp)
                        elif override == 'vecsize' and not is_gpu:
                            override_log('vecsize', ivecsizes,
                                         overrides[override])
                            outplat['vecsize'] = listify(overrides[override])
                        elif override == 'gpuvecsize' and is_gpu:
                            override_log('gpuvecsize', ivecsizes,
                                         overrides[override])
                            outplat['vecsize'] = listify(overrides[override])
                        elif override == 'vectype' and not is_gpu:
                            # we have to do this at the end
                            ivectypes_override = overrides[override]
                        elif override == 'gpuvectype' and is_gpu:
                            ivectypes_override = overrides[override]
                        elif override == 'models':
                            # check that all models are valid
                            for model in overrides[override]:
                                if model not in imodels:
                                    raise InvalidOverrideException(
                                        override, model, imodels)
                            # and replace
                            override_log('models', stringify_args(imodels),
                                         stringify_args(overrides[override]))
                            imodels = tuple(overrides[override])

                    if ivectypes_override is not None:
                        c = clean.copy()
                        apply_vectypes(c,
                                       outplat['vecsize'],
                                       is_wide='wide' in ivectypes_override,
                                       is_deep='deep' in ivectypes_override)
                        # and copy into working
                        outplat['wide'] = c['wide'] if 'wide' in c else [False]
                        outplat['deep'] = c['deep'] if 'deep' in c else [False]
                        outplat['vecsize'] = c['vecsize']
                        old = ['']
                        if is_wide:
                            old += ['wide']
                        if is_deep:
                            old += ['deep']
                        elif not is_wide:
                            old += ['par']
                        override_log('vecsize', old, ivectypes_override)

                # and finally, convert back to an option loop format
                out_params.append(
                    [('num_cores', icores), ('order',
                                             iorder), ('rate_spec', rate_spec),
                     ('split_kernels', split_kernels), ('conp', iconp),
                     ('sparse', [stype]), ('jac_type',
                                           [jtype]), ('models', [imodels])] +
                    [(key, value) for key, value in six.iteritems(outplat)])

    max_vec_width = 1
    vector_params = [
        dict(p)['vecsize'] for p in out_params
        if 'vecsize' in dict(p) and dict(p)['vecsize'] != [None]
    ]
    if vector_params:
        max_vec_width = max(max_vec_width,
                            max([max(x) for x in vector_params]))
    from . import reduce_oploop
    loop = reduce_oploop(out_params)
    return models, loop, max_vec_width
Exemplo n.º 10
0
def test_listify(value, expected):
    assert listify(value) == expected
Exemplo n.º 11
0
def get_driver(loopy_opts, namestore, inputs, outputs, driven,
               test_size=None):
    """
    Implements a driver function for kernel evaluation.
    This allows pyJac to utilize a smaller working-buffer (sized to the
    global work size), and implements a static(like) scheduling algorithm

    Notes
    -----
    Currently Loopy doesn't have the machinery to enable native calling of other
    loopy kernels, so we have to fudge this a bit (and this can't be used for
    unit-tests).  Future versions will allow us to natively wrap test functions
    (i.e., once the new function calling interface is in place in Loopy)

    :see:`driver-function` for more information

    Parameters
    ----------
    loopy_opts: :class:`loopy_options`
        The loopy options specifying how to create this kernel
    namestore: :class:`NameStore`
        The namestore class that owns our arrays
    inputs: list of :class:`lp.KernelArgument`
        The arrays that should be copied into internal working buffers
        before calling subfunctions
    outputs: list of :class:`lp.KernelArgument`
        The arrays should be copied back into global memory after calling
        subfunctions
    driven: :class:`kernel_generator`
        The kernel generator to wrap in the driver

    Returns
    -------
    knl_list : list of :class:`knl_info`
        The generated infos for feeding into the kernel generator

    """

    # we have to do some shennanigains here to get this to work in loopy:
    #
    # 1. Loopy currently doesn't allow you to alter the for-loop increment size,
    #    so for OpenCL where we must increment by the global work size, we have to
    #    put a dummy for-loop in, and teach the kernel generator to work around it
    #
    # 2. Additionally, the OpenMP target in Loopy is Coming Soon (TM), hence we need
    #    our own dynamic scheduling preamble for the driver loop (
    #    if we're operating in queue-form)
    #
    # 3. Finally, Loopy is just now supporting the ability to natively call other
    #    kernels, so for the moment we still need to utilize the dummy function
    #    calling we have set-up for the finite difference Jacobian

    # first, get our input / output arrays
    arrays = {}
    to_find = set(listify(inputs)) | set(listify(outputs))
    # create mapping of array names
    array_names = {v.name: v for k, v in six.iteritems(vars(namestore))
                   if isinstance(v, arc.creator) and not (
                    v.fixed_indicies or v.affine)}
    for arr in to_find:
        arr_creator = next((array_names[x] for x in array_names if x == arr), None)
        if arr_creator is None:
            continue
        arrays[arr] = arr_creator

    if len(arrays) != len(to_find):
        missing = to_find - set(arrays.keys())
        logger = logging.getLogger(__name__)
        logger.debug('Input/output arrays for queue_driver kernel {} not found.'
                     .format(stringify_args(missing)))
        raise InvalidInputSpecificationException(missing)

    def arr_non_ic(array_input):
        return len(array_input.shape) > 1

    # ensure the inputs and output are all identically sized (among those that have)
    # a non-initial condition dimension

    def __check(check_input):
        shape = ()

        def _raise(desc, inp, nameref, shape):
            logger = logging.getLogger(__name__)
            logger.debug('{} array for driver kernel {} does not '
                         'match expected shape (from array {}).  '
                         'Expected: ({}), got: ({})'.format(
                            desc, inp.name, nameref,
                            stringify_args(inp.shape),
                            stringify_args(shape))
                         )
            raise InvalidInputSpecificationException(inp.name)

        nameref = None
        desc = 'Input' if check_input else 'Output'
        for inp in [arrays[x] for x in (inputs if check_input else outputs)]:
            if not arr_non_ic(inp):
                # only the initial condition dimension, fine
                continue
            if shape:
                if inp.shape != shape and len(inp.shape) == len(shape):
                    # allow different shapes in the last index
                    if not all(x == y for x, y in zip(*(
                            inp.shape[:-1], shape[:-1]))):
                        _raise(desc, inp, nameref, shape)
                    # otherwise, take the maximum of the shape entry
                    shape = shape[:-1] + (max(shape[-1], inp.shape[-1]),)

                elif inp.shape != shape:
                    _raise(desc, inp, nameref, shape)
            else:
                nameref = inp.name
                shape = inp.shape[:]
        if not shape:
            logger = logging.getLogger(__name__)
            logger.debug('No {} arrays supplied to driver that require '
                         'copying to working buffer!'.format(desc))
            raise InvalidInputSpecificationException('Driver ' + desc + ' arrays')
        return shape

    def create_interior_kernel(for_input):
        shape = __check(for_input)
        name = 'copy_{}'.format('in' if for_input else 'out')
        # get arrays
        arrs = [arrays[x] for x in (inputs if for_input else outputs)]

        # create a dummy map and store
        map_shape = np.arange(shape[1], dtype=arc.kint_type)
        mapper = arc.creator(name, arc.kint_type, map_shape.shape, 'C',
                             initializer=map_shape)
        mapstore = arc.MapStore(loopy_opts, mapper, test_size)

        # determine what other inames we need, if any
        namer = UniqueNameGenerator(set([mapstore.iname]))
        extra_inames = []
        for i in six.moves.range(2, len(shape)):
            iname = namer(mapstore.iname)
            extra_inames.append((iname, '0 <= {} < {}'.format(
                iname, shape[i])))

        indicies = [arc.global_ind, mapstore.iname] + [
            ex[0] for ex in extra_inames]
        global_indicies = indicies[:]
        global_indicies[0] += ' + ' + driver_offset.name

        # bake in SIMD pre-split
        vec_spec = None
        split_spec = None
        conditional_index = get_problem_index(loopy_opts)

        def __build(arr, local, **kwargs):
            inds = global_indicies if not local else indicies
            if isinstance(arr, arc.jac_creator) and arr.is_sparse:
                # this is a sparse Jacobian, hence we have to override the default
                # indexing (as we're doing a straight copy)
                kwargs['ignore_lookups'] = True
            if arr_non_ic(arr):
                return mapstore.apply_maps(arr, *inds, **kwargs)
            else:
                return mapstore.apply_maps(arr, inds[0], **kwargs)

        # create working buffer version of arrays
        working_buffers = []
        working_strs = []
        for arr in arrs:
            arr_lp, arr_str = __build(arr, True, use_local_name=True)
            working_buffers.append(arr_lp)
            working_strs.append(arr_str)

        # create global versions of arrays
        buffers = []
        strs = []
        for arr in arrs:
            arr_lp, arr_str = __build(arr, False, reshape_to_working_buffer=False)
            buffers.append(arr_lp)
            strs.append(arr_str)

        # now create the instructions
        instruction_template = Template("""
            if ${ind} < ${problem_size} ${shape_check}
                ${local_buffer} = ${global_buffer} {id=copy_${name}}
            end
        """) if for_input else Template("""
            if ${ind} < ${problem_size} ${shape_check}
                ${global_buffer} = ${local_buffer} {id=copy_${name}}
            end
        """)

        warnings = []
        instructions = []
        for i, arr in enumerate(arrs):
            # get shape check
            shape_check = ''
            if arr.shape[-1] != shape[-1] and len(arr.shape) == len(shape):
                shape_check = ' and {} < {}'.format(
                    indicies[-1], arr.shape[-1])

            instructions.append(instruction_template.substitute(
                local_buffer=working_strs[i],
                global_buffer=strs[i],
                ind=conditional_index,
                problem_size=arc.problem_size.name,
                name=arr.name,
                shape_check=shape_check))
            warnings.append('write_race(copy_{})'.format(arr.name))
        if loopy_opts.is_simd:
            warnings.append('vectorize_failed')
            warnings.append('unrolled_vector_iname_conditional')
        instructions = '\n'.join(instructions)

        kwargs = {}
        if loopy_opts.lang == 'c':
            # override the number of copies in this function to 1
            # (i.e., 1 per-thread)
            kwargs['iname_domain_override'] = [(arc.global_ind, '0 <= {} < 1'.format(
                arc.global_ind))]

        priorities = ([arc.global_ind + '_outer'] if loopy_opts.pre_split else [
            arc.global_ind]) + [arc.var_name]
        # and return the kernel info
        return knl_info(name=name,
                        instructions=instructions,
                        mapstore=mapstore,
                        var_name=arc.var_name,
                        extra_inames=extra_inames,
                        kernel_data=buffers + working_buffers + [
                          arc.work_size, arc.problem_size, driver_offset],
                        silenced_warnings=warnings,
                        vectorization_specializer=vec_spec,
                        split_specializer=split_spec,
                        unrolled_vector=True,
                        loop_priority=set([tuple(priorities + [
                          iname[0] for iname in extra_inames])]),
                        **kwargs)

    copy_in = create_interior_kernel(True)
    # create a dummy kernel info that simply calls our internal function
    instructions = driven.name + '()'
    # create mapstore
    call_name = driven.name
    repeats = 1
    if loopy_opts.depth:
        # we need 'var_name' to have a non-unity size
        repeats = loopy_opts.vector_width

    map_shape = np.arange(repeats, dtype=arc.kint_type)
    mapper = arc.creator(call_name, arc.kint_type, map_shape.shape, 'C',
                         initializer=map_shape)
    mapstore = arc.MapStore(loopy_opts, mapper, test_size)
    mangler = lp_pregen.MangleGen(call_name, tuple(), tuple())
    kwargs = {}
    if loopy_opts.lang == 'c':
        # override the number of calls to the driven function in the driver, this
        # is currently fixed to 1 (i.e., 1 per-thread)
        kwargs['iname_domain_override'] = [(arc.global_ind, '0 <= {} < 1'.format(
            arc.global_ind))]

    func_call = knl_info(name='driver',
                         instructions=instructions,
                         mapstore=mapstore,
                         kernel_data=[arc.work_size, arc.problem_size],
                         var_name=arc.var_name,
                         extra_inames=copy_in.extra_inames[:],
                         manglers=[mangler],
                         **kwargs)
    copy_out = create_interior_kernel(False)

    # and return
    return [copy_in, func_call, copy_out]
Exemplo n.º 12
0
def get_toolchain(lang, shared=True, executable=True, **kwargs):
    """
    Return a codepy :class:`Toolchain` to build / link pyJac files.

    Parameters
    ----------
    lang: str
        The language to build
    shared: bool [True]
        If true, build a shared library
    executable: bool [True]
        If true, build a _executable_ shared library; note: requires
        :param:`shared`=True
    **kwargs:
    """

    # compilation flags
    compile_flags = opt_flags
    from pyjac.utils import get_env_val
    # read debug flag from ENV or config
    if get_env_val('debug'):
        compile_flags = debug_flags

    # link flags
    linkflags = ldflags[lang]
    if shared and not executable:
        linkflags += shared_flags[lang]
        compile_flags += shared_flags[lang]
    elif executable:
        if not shared:
            logger = logging.getLogger(__name__)
            logger.error('Cannot create an executable non-shared library!')
            raise LibraryGenerationError()

        compile_flags += shared_flags[lang]
        linkflags += shared_exec_flags[lang]
    if run_dirs[lang]:
        for rdir in utils.listify(run_dirs[lang]):
            linkflags += ['-Wl,-rpath,{}'.format(rdir)]
    so_ext = lib_ext(shared)

    toolchain_args = {'cc': cmd_compile[lang][:],
                      'cflags': (flags[lang] + compile_flags)[:],
                      'ldflags': linkflags[:],
                      'include_dirs': includes[lang][:],
                      'library_dirs': lib_dirs[lang][:],
                      'libraries': libs[lang][:],
                      'so_ext': so_ext,
                      'o_ext': '.o',
                      'defines': [],
                      'undefines': []}

    # merge in user kwargs
    for k, v in six.iteritems(kwargs):
        if k not in toolchain_args or not toolchain_args[k]:
            # empty or user supplied only
            toolchain_args[k] = v
        elif isinstance(toolchain_args[k], list):
            # may simply append to the list
            v = utils.listify(v)
            toolchain_args[k] += v[:]
        else:
            # else, replace
            toolchain_args[k] = v

    return GCCToolchain(**toolchain_args)