def test_decltype_templates(): t = np.dtype([('$foo::array::f8', np.object)]) assert (decltype(t) == 'struct{array<float64>foo;}') t = np.dtype([('$foo::array::i4', np.object)]) assert (decltype(t) == 'struct{array<int32>foo;}') t = np.dtype([('$foo::array::i4::?', np.object)]) assert (decltype(t) == 'struct{array<int32,bool>foo;}')
def gencode_probability(pfunc, name): fun, jac = pfunc.gen_expr() return Template(r''' using ${name}_theta_t = ${theta_t}; struct ${name}_t : ${name}_theta_t { constexpr static int jac_dims = ${jac_dims}; template<class N> __device__ __inline__ auto operator() (N const &n) const { return ${expr}; } template<class N> __device__ __inline__ auto _j_a_c_o_b_i_a_n_(N const &n) const { graphdot::array<float, jac_dims> j; ${jac;\n}; return j; } }; __constant__ ${name}_t ${name}; ''').render( name=name, jac_dims=len(jac), theta_t=decltype(pfunc), expr=fun, jac=[f'j[{i}] = {expr}' for i, expr in enumerate(jac)] )
def test_decltype_order(): ''' ensure output member order is the same as that in dtype ''' np.random.seed(0) type_list = [ np.bool_, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64, np.float32, np.float64 ] for _ in range(1024): length = np.random.randint(1, 16) member_types = np.random.choice(type_list, length) member_names = [] while len(member_names) < length: name = exrex.getone('[_a-zA-Z][_0-9a-zA-Z]*', 16) if name not in member_names: member_names.append(name) type = np.dtype(list(zip(member_names, member_types))) cstr = decltype(type) for prev, next in zip(type.names[:-1], type.names[1:]): cprev = '%s;' % decltype(type.fields[prev][0], prev) cnext = '%s;' % decltype(type.fields[next][0], next) assert (search(cprev, cstr).start() <= search(cnext, cstr).start())
def test_decltype_compose(): comp1 = np.dtype([('x', np.float32), ('y', np.int16)]) comp2 = np.dtype([('x', comp1), ('y', np.bool_)]) assert (decltype(np.float32) in decltype(comp1)) assert (decltype(np.int16) in decltype(comp1)) assert (decltype(comp1, 'x') in decltype(comp2))
def gencode_kernel(kernel, name): fun, jac = kernel.gen_expr('x1', 'x2') return Template(r''' using ${name}_theta_t = ${theta_t}; struct ${name}_t : ${name}_theta_t { constexpr static int jac_dims = ${jac_dims}; template<class X> __device__ __inline__ auto operator() (X const &x1, X const &x2) const { return ${expr}; } template<class X> __device__ __inline__ auto _j_a_c_o_b_i_a_n_(X const &x1, X const &x2) const { graphdot::array<float, jac_dims> j; ${jac;\n}; return j; } }; __constant__ ${name}_t ${name}; __constant__ ${name}_t ${name}_diff_grid[2 * ${n_theta}]; __constant__ float32 ${name}_flat_theta[${n_theta}]; ''').render( name=name, jac_dims=len(jac), theta_t=decltype(kernel), expr=fun, jac=[f'j[{i}] = {expr}' for i, expr in enumerate(jac)], n_theta=len(list(flatten(kernel.theta))) )
def __call__(self, graphs, diags, node_kernel, edge_kernel, p, q, eps, ftol, gtol, jobs, starts, gramian, active, gradient, nX, nY, nJ, traits, timer): ''' transfer graphs and starting probabilities to GPU ''' timer.tic('transferring graphs to GPU') og_last = None graphs_d = umempty(len(graphs), dtype=OctileGraph.dtype) for i, g in enumerate(graphs): og, ogstate = self._register_graph(g) if i > 0: self._assert_homogeneous(og_last, og) og_last = og graphs_d[i] = ogstate weighted = og_last.weighted node_t = og_last.node_t edge_t = og_last.edge_t timer.toc('transferring graphs to GPU') ''' allocate global job counter ''' timer.tic('allocate global job counter') i_job_global = umzeros(1, np.uint32) timer.toc('allocate global job counter') ''' code generation ''' timer.tic('code generation') if weighted: edge_kernel = TensorProduct(weight=Product(), label=edge_kernel) use_theta_grid = traits.eval_gradient is True node_kernel_src = self.gencode_kernel(node_kernel, 'node_kernel') edge_kernel_src = self.gencode_kernel(edge_kernel, 'edge_kernel') p_start_src = self.gencode_probability(p, 'p_start') with self.template.context(traits=traits) as template: self.source = template.render( node_kernel=node_kernel_src, edge_kernel=edge_kernel_src, p_start=p_start_src, node_t=decltype(node_t), edge_t=decltype(edge_t) ) timer.toc('code generation') ''' JIT ''' timer.tic('JIT') kernel = self.module.get_function('graph_maximin_distance') timer.toc('JIT') ''' calculate launch configuration ''' timer.tic('calculating launch configuration') launch_block_count = (self.device.MULTIPROCESSOR_COUNT * self.block_per_sm) shmem_bytes_per_warp = self.module.get_global( 'shmem_bytes_per_warp' )[1] shmem_bytes_per_block = (shmem_bytes_per_warp * self.block_size // self.device.WARP_SIZE) max_graph_size = np.max([len(g.nodes) for g in graphs]) scratch_pcg = self.allocate_pcg_scratch( launch_block_count, max_graph_size ) ''' copy micro kernel parameters to GPU ''' for name, uker in [('node_kernel', node_kernel), ('edge_kernel', edge_kernel)]: states = np.array( self.pack_state(uker, diff_grid=use_theta_grid, diff_eps=eps), dtype=uker.dtype ) p_uker, _ = self.module.get_global(name) cuda.memcpy_htod(p_uker, states[:1]) if use_theta_grid: p_diff_grid, _ = self.module.get_global(f'{name}_diff_grid') p_flat_theta, _ = self.module.get_global(f'{name}_flat_theta') cuda.memcpy_htod(p_diff_grid, states[1:]) cuda.memcpy_htod( p_flat_theta, np.fromiter(flatten(uker.theta), dtype=np.float32) ) p_p_start, _ = self.module.get_global('p_start') cuda.memcpy_htod( p_p_start, np.array([p.state], dtype=p.dtype) ) timer.toc('calculating launch configuration') ''' GPU kernel execution ''' timer.tic('GPU kernel execution') kernel( graphs_d, diags, scratch_pcg, jobs, starts, gramian, active, gradient if gradient is not None else np.uintp(0), i_job_global, np.uint32(len(jobs)), np.uint32(nX), np.uint32(nY), np.uint32(nJ), np.float32(q), np.float32(q), # placeholder for q0 np.float32(eps), np.float32(ftol), np.float32(gtol), grid=(launch_block_count, 1, 1), block=(self.block_size, 1, 1), shared=shmem_bytes_per_block, ) self.ctx.synchronize() timer.toc('GPU kernel execution')
def test_decltype_empty(): assert ('empty' in decltype([]))
def test_decltype_array(element_type, size): assert (decltype((element_type, size)) == decltype(element_type) + ' ' + ''.join(["[%d]" % d for d in size])) assert (decltype(str(size) + np.dtype(element_type).name) == decltype(element_type) + ' ' + ''.join(["[%d]" % d for d in size]))
def test_decltype_string(case): dtype, typestring = case assert (decltype(dtype).strip() == typestring)