示例#1
0
	def __init__(self, dtype):
		from pycuda.curandom import md5_code
		from pycuda.elementwise import get_elwise_kernel

		if dtype == numpy.complex64:
			self._func = get_elwise_kernel(
				"float2 *dest, unsigned int seed",
				md5_code + """
				#define POW_2_M32 (1/4294967296.0f)

				dest[i] = make_float2(a*POW_2_M32, b*POW_2_M32);
				if ((i += total_threads) < n)
					dest[i] = make_float2(c*POW_2_M32, d*POW_2_M32);
				""",
				"md5_rng_float")
		elif dtype == numpy.complex128:
			self._func = get_elwise_kernel(
				"pycuda::complex<double> *dest, unsigned int seed",
				md5_code + """
				#define POW_2_M32 (1/4294967296.0)
				#define POW_2_M64 (1/18446744073709551616.)

				dest[i] = pycuda::complex<double>(
					a*POW_2_M32 + b*POW_2_M64,
					c*POW_2_M32 + d*POW_2_M64);
				""",
				"md5_rng_float")
示例#2
0
def rand(shape, dtype=numpy.float32, stream=None):
    from pycuda.gpuarray import GPUArray
    from pycuda.elementwise import get_elwise_kernel

    result = GPUArray(shape, dtype)
    
    if dtype == numpy.float32:
        func = get_elwise_kernel(
            "float *dest, unsigned int seed", 
            md5_code + """
            #define POW_2_M32 (1/4294967296.0f)
            dest[i] = a*POW_2_M32;
            if ((i += total_threads) < n)
                dest[i] = b*POW_2_M32;
            if ((i += total_threads) < n)
                dest[i] = c*POW_2_M32;
            if ((i += total_threads) < n)
                dest[i] = d*POW_2_M32;
            """,
            "md5_rng_float")
    elif dtype == numpy.float64:
        func = get_elwise_kernel(
            "double *dest, unsigned int seed", 
            md5_code + """
            #define POW_2_M32 (1/4294967296.0)
            #define POW_2_M64 (1/18446744073709551616.)

            dest[i] = a*POW_2_M32 + b*POW_2_M64;

            if ((i += total_threads) < n)
            {
              dest[i] = c*POW_2_M32 + d*POW_2_M64;
            }
            """,
            "md5_rng_float")
    elif dtype in [numpy.int32, numpy.uint32]:
        func = get_elwise_kernel(
            "unsigned int *dest, unsigned int seed", 
            md5_code + """
            dest[i] = a;
            if ((i += total_threads) < n)
                dest[i] = b;
            if ((i += total_threads) < n)
                dest[i] = c;
            if ((i += total_threads) < n)
                dest[i] = d;
            """,
            "md5_rng_int")
    else:
        raise NotImplementedError;

    func.set_block_shape(*result._block)
    func.prepared_async_call(result._grid, stream,
            result.gpudata, numpy.random.randint(2**31-1), result.size)
    
    return result
示例#3
0
 def guarded_div_kernel(self, dtype_x, dtype_y, dtype_z):
     from pycuda.elementwise import get_elwise_kernel
     from pycuda.tools import dtype_to_ctype
     return get_elwise_kernel(
         "%(tp_x)s *x, %(tp_y)s *y, %(tp_z)s *z" % {
             "tp_x": dtype_to_ctype(dtype_x),
             "tp_y": dtype_to_ctype(dtype_y),
             "tp_z": dtype_to_ctype(dtype_z),
         }, "z[i] = y[i] == 0 ? 0 : (x[i] / y[i])", "divide")
示例#4
0
文件: cg.py 项目: leifdenby/pycuda
 def guarded_div_kernel(self, dtype_x, dtype_y, dtype_z):
     from pycuda.elementwise import get_elwise_kernel
     from pycuda.tools import dtype_to_ctype
     return get_elwise_kernel(
             "%(tp_x)s *x, %(tp_y)s *y, %(tp_z)s *z" % {
                 "tp_x": dtype_to_ctype(dtype_x),
                 "tp_y": dtype_to_ctype(dtype_y),
                 "tp_z": dtype_to_ctype(dtype_z),
                 },
             "z[i] = y[i] == 0 ? 0 : (x[i] / y[i])",
             "divide")
示例#5
0
 def make_kernel_internal(self, args, instructions):
     from pycuda.elementwise import get_elwise_kernel
     return get_elwise_kernel(args, instructions, name="vector_expression")
示例#6
0
    else:
        raise ValueError("Incompatible dtype")
    return df

def linear(x):
    pass

def df_linear(x):
    return x

sample_dropout_mask_kernel = get_elwise_kernel(
    "float *mat, float *dropout, float dropout_probability",
    """
    if (dropout[i] <= dropout_probability) {
         dropout[i] = 0.;
         mat[i] = 0.;
    } else {
         dropout[i] = 1.;
    }
    """,
    "sample_dropout_mask")


def sample_dropout_mask(x, dropout_probability=.5, columns=None, stream=None):
    """ Samples a dropout mask and applies it in place"""

    assert x.flags.c_contiguous

    if columns is not None:
        assert len(columns) == 2
        x_tmp = x
示例#7
0
 def make_kernel_internal(self, args, instructions):
     from pycuda.elementwise import get_elwise_kernel
     return get_elwise_kernel(args, instructions, name="vector_expression")