def ptx_gridsize1d(context, builder, sig, args): assert len(args) == 1 ntidx = nvvmutils.call_sreg(builder, "ntid.x") nctaidx = nvvmutils.call_sreg(builder, "nctaid.x") res = builder.mul(ntidx, nctaidx) return res
def ptx_gridsize2d(context, builder, sig, args): assert len(args) == 1 ntidx = nvvmutils.call_sreg(builder, "ntid.x") nctaidx = nvvmutils.call_sreg(builder, "nctaid.x") ntidy = nvvmutils.call_sreg(builder, "ntid.y") nctaidy = nvvmutils.call_sreg(builder, "nctaid.y") r1 = builder.mul(ntidx, nctaidx) r2 = builder.mul(ntidy, nctaidy) return cgutils.pack_array(builder, [r1, r2])
def _nthreads_for_dim(builder, dim): ntid = nvvmutils.call_sreg(builder, f"ntid.{dim}") nctaid = nvvmutils.call_sreg(builder, f"nctaid.{dim}") return builder.mul(ntid, nctaid)
def cuda_warpsize(context, builder, sig, args): return nvvmutils.call_sreg(builder, 'warpsize')
def cuda_laneid(context, builder, sig, args): return nvvmutils.call_sreg(builder, 'laneid')
def initialize_dim3(builder, prefix): x = nvvmutils.call_sreg(builder, "%s.x" % prefix) y = nvvmutils.call_sreg(builder, "%s.y" % prefix) z = nvvmutils.call_sreg(builder, "%s.z" % prefix) return cgutils.pack_struct(builder, (x, y, z))
def ptx_sreg_impl(context, builder, sig, args): assert not args return nvvmutils.call_sreg(builder, sreg)