示例#1
0
def test_decltype_templates():
    t = np.dtype([('$foo::array::f8', np.object)])
    assert (decltype(t) == 'struct{array<float64>foo;}')
    t = np.dtype([('$foo::array::i4', np.object)])
    assert (decltype(t) == 'struct{array<int32>foo;}')
    t = np.dtype([('$foo::array::i4::?', np.object)])
    assert (decltype(t) == 'struct{array<int32,bool>foo;}')
示例#2
0
    def gencode_probability(pfunc, name):
        fun, jac = pfunc.gen_expr()

        return Template(r'''
        using ${name}_theta_t = ${theta_t};

        struct ${name}_t : ${name}_theta_t {

            constexpr static int jac_dims = ${jac_dims};

            template<class N>
            __device__ __inline__
            auto operator() (N const &n) const {
                return ${expr};
            }

            template<class N>
            __device__ __inline__
            auto _j_a_c_o_b_i_a_n_(N const &n) const {
                graphdot::array<float, jac_dims> j;
                ${jac;\n};
                return j;
            }
        };

        __constant__ ${name}_t ${name};
        ''').render(
            name=name,
            jac_dims=len(jac),
            theta_t=decltype(pfunc),
            expr=fun,
            jac=[f'j[{i}] = {expr}' for i, expr in enumerate(jac)]
        )
示例#3
0
def test_decltype_order():
    ''' ensure output member order is the same as that in dtype '''
    np.random.seed(0)
    type_list = [
        np.bool_, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16,
        np.uint32, np.uint64, np.float32, np.float64
    ]
    for _ in range(1024):
        length = np.random.randint(1, 16)
        member_types = np.random.choice(type_list, length)
        member_names = []
        while len(member_names) < length:
            name = exrex.getone('[_a-zA-Z][_0-9a-zA-Z]*', 16)
            if name not in member_names:
                member_names.append(name)
        type = np.dtype(list(zip(member_names, member_types)))
        cstr = decltype(type)
        for prev, next in zip(type.names[:-1], type.names[1:]):
            cprev = '%s;' % decltype(type.fields[prev][0], prev)
            cnext = '%s;' % decltype(type.fields[next][0], next)
            assert (search(cprev, cstr).start() <= search(cnext, cstr).start())
示例#4
0
def test_decltype_compose():
    comp1 = np.dtype([('x', np.float32), ('y', np.int16)])
    comp2 = np.dtype([('x', comp1), ('y', np.bool_)])

    assert (decltype(np.float32) in decltype(comp1))
    assert (decltype(np.int16) in decltype(comp1))
    assert (decltype(comp1, 'x') in decltype(comp2))
示例#5
0
    def gencode_kernel(kernel, name):
        fun, jac = kernel.gen_expr('x1', 'x2')

        return Template(r'''
        using ${name}_theta_t = ${theta_t};

        struct ${name}_t : ${name}_theta_t {

            constexpr static int jac_dims = ${jac_dims};

            template<class X>
            __device__ __inline__
            auto operator() (X const &x1, X const &x2) const {
                return ${expr};
            }

            template<class X>
            __device__ __inline__
            auto _j_a_c_o_b_i_a_n_(X const &x1, X const &x2) const {
                graphdot::array<float, jac_dims> j;
                ${jac;\n};
                return j;
            }
        };

        __constant__ ${name}_t ${name};
        __constant__ ${name}_t ${name}_diff_grid[2 * ${n_theta}];
        __constant__ float32   ${name}_flat_theta[${n_theta}];
        ''').render(
            name=name,
            jac_dims=len(jac),
            theta_t=decltype(kernel),
            expr=fun,
            jac=[f'j[{i}] = {expr}' for i, expr in enumerate(jac)],
            n_theta=len(list(flatten(kernel.theta)))
        )
示例#6
0
    def __call__(self, graphs, diags, node_kernel, edge_kernel, p, q, eps,
                 ftol, gtol, jobs, starts, gramian, active, gradient, nX, nY,
                 nJ, traits, timer):
        ''' transfer graphs and starting probabilities to GPU '''
        timer.tic('transferring graphs to GPU')

        og_last = None
        graphs_d = umempty(len(graphs), dtype=OctileGraph.dtype)
        for i, g in enumerate(graphs):
            og, ogstate = self._register_graph(g)
            if i > 0:
                self._assert_homogeneous(og_last, og)
            og_last = og
            graphs_d[i] = ogstate

        weighted = og_last.weighted
        node_t = og_last.node_t
        edge_t = og_last.edge_t

        timer.toc('transferring graphs to GPU')

        ''' allocate global job counter '''
        timer.tic('allocate global job counter')
        i_job_global = umzeros(1, np.uint32)
        timer.toc('allocate global job counter')

        ''' code generation '''
        timer.tic('code generation')
        if weighted:
            edge_kernel = TensorProduct(weight=Product(),
                                        label=edge_kernel)

        use_theta_grid = traits.eval_gradient is True
        node_kernel_src = self.gencode_kernel(node_kernel, 'node_kernel')
        edge_kernel_src = self.gencode_kernel(edge_kernel, 'edge_kernel')
        p_start_src = self.gencode_probability(p, 'p_start')

        with self.template.context(traits=traits) as template:
            self.source = template.render(
                node_kernel=node_kernel_src,
                edge_kernel=edge_kernel_src,
                p_start=p_start_src,
                node_t=decltype(node_t),
                edge_t=decltype(edge_t)
            )
        timer.toc('code generation')

        ''' JIT '''
        timer.tic('JIT')
        kernel = self.module.get_function('graph_maximin_distance')
        timer.toc('JIT')

        ''' calculate launch configuration '''
        timer.tic('calculating launch configuration')
        launch_block_count = (self.device.MULTIPROCESSOR_COUNT
                              * self.block_per_sm)
        shmem_bytes_per_warp = self.module.get_global(
            'shmem_bytes_per_warp'
        )[1]
        shmem_bytes_per_block = (shmem_bytes_per_warp * self.block_size
                                 // self.device.WARP_SIZE)

        max_graph_size = np.max([len(g.nodes) for g in graphs])
        scratch_pcg = self.allocate_pcg_scratch(
            launch_block_count, max_graph_size
        )

        ''' copy micro kernel parameters to GPU '''
        for name, uker in [('node_kernel', node_kernel),
                           ('edge_kernel', edge_kernel)]:
            states = np.array(
                self.pack_state(uker, diff_grid=use_theta_grid, diff_eps=eps),
                dtype=uker.dtype
            )

            p_uker, _ = self.module.get_global(name)
            cuda.memcpy_htod(p_uker, states[:1])

            if use_theta_grid:
                p_diff_grid, _ = self.module.get_global(f'{name}_diff_grid')
                p_flat_theta, _ = self.module.get_global(f'{name}_flat_theta')
                cuda.memcpy_htod(p_diff_grid, states[1:])
                cuda.memcpy_htod(
                    p_flat_theta,
                    np.fromiter(flatten(uker.theta), dtype=np.float32)
                )

        p_p_start, _ = self.module.get_global('p_start')
        cuda.memcpy_htod(
            p_p_start, np.array([p.state], dtype=p.dtype)
        )

        timer.toc('calculating launch configuration')

        ''' GPU kernel execution '''
        timer.tic('GPU kernel execution')
        kernel(
            graphs_d,
            diags,
            scratch_pcg,
            jobs,
            starts,
            gramian,
            active,
            gradient if gradient is not None else np.uintp(0),
            i_job_global,
            np.uint32(len(jobs)),
            np.uint32(nX),
            np.uint32(nY),
            np.uint32(nJ),
            np.float32(q),
            np.float32(q),  # placeholder for q0
            np.float32(eps),
            np.float32(ftol),
            np.float32(gtol),
            grid=(launch_block_count, 1, 1),
            block=(self.block_size, 1, 1),
            shared=shmem_bytes_per_block,
        )
        self.ctx.synchronize()
        timer.toc('GPU kernel execution')
示例#7
0
def test_decltype_empty():
    assert ('empty' in decltype([]))
示例#8
0
def test_decltype_array(element_type, size):
    assert (decltype((element_type, size)) == decltype(element_type) + ' ' +
            ''.join(["[%d]" % d for d in size]))
    assert (decltype(str(size) +
                     np.dtype(element_type).name) == decltype(element_type) +
            ' ' + ''.join(["[%d]" % d for d in size]))
示例#9
0
def test_decltype_string(case):
    dtype, typestring = case
    assert (decltype(dtype).strip() == typestring)