def __init__( self, seed_getter, offset, state_type, vector_type, generator_bits, additional_source, scramble_type=None, ): super().__init__(state_type, vector_type, generator_bits, additional_source) generator_count = self.generators_per_block * self.block_count if seed_getter is None: seed = array.to_gpu( np.asarray(np.random.randint(0, (1 << 31) - 1, generator_count), dtype=np.int32)) else: seed = seed_getter(generator_count) if not (isinstance(seed, pycuda.gpuarray.GPUArray) and seed.dtype == np.int32 and seed.size == generator_count): raise TypeError( "seed must be GPUArray of integers of right length") p = self.module.get_function("prepare") p.prepare("PiPi") from pycuda.characterize import has_stack has_stack = has_stack() if has_stack: prev_stack_size = drv.Context.get_limit(drv.limit.STACK_SIZE) try: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, 1 << 14) # 16k try: p.prepared_call( (self.block_count, 1), (self.generators_per_block, 1, 1), self.state, generator_count, seed.gpudata, offset, ) except drv.LaunchError: raise ValueError( "Initialisation failed. Decrease number of threads.") finally: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, prev_stack_size)
def __init__( self, dir_vector, dir_vector_dtype, dir_vector_size, dir_vector_set, offset, state_type, vector_type, generator_bits, sobol_random_source, ): super().__init__(state_type, vector_type, generator_bits, sobol_random_source) if dir_vector is None: dir_vector = generate_direction_vectors( self.block_count * self.generators_per_block, dir_vector_set) if not (isinstance(dir_vector, pycuda.gpuarray.GPUArray) and dir_vector.dtype == dir_vector_dtype and dir_vector.shape == (self.block_count * self.generators_per_block, dir_vector_size)): raise TypeError( "seed must be GPUArray of integers of right length") p = self.module.get_function("prepare") p.prepare("PiPi") from pycuda.characterize import has_stack has_stack = has_stack() if has_stack: prev_stack_size = drv.Context.get_limit(drv.limit.STACK_SIZE) try: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, 1 << 14) # 16k try: p.prepared_call( (self.block_count, 1), (self.generators_per_block, 1, 1), self.state, self.block_count * self.generators_per_block, dir_vector.gpudata, offset, ) except drv.LaunchError: raise ValueError( "Initialisation failed. Decrease number of threads.") finally: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, prev_stack_size)
def __init__(self, seed_getter=None, offset=0): """ :arg seed_getter: a function that, given an integer count, will yield an `int32` :class:`GPUArray` of seeds. """ super(XORWOWRandomNumberGenerator, self).__init__( 'curandStateXORWOW', 'unsigned int', xorwow_random_source + xorwow_skip_ahead_sequence_source + random_skip_ahead64_source) generator_count = self.generators_per_block * self.block_count if seed_getter is None: seed = array.to_gpu( np.asarray(np.random.random_integers( 0, (1 << 31) - 2, generator_count), dtype=np.int32)) else: seed = seed_getter(generator_count) if not (isinstance(seed, pycuda.gpuarray.GPUArray) and seed.dtype == np.int32 and seed.size == generator_count): raise TypeError( "seed must be GPUArray of integers of right length") p = self.module.get_function("prepare") p.prepare("PiPi") self.skip_ahead_sequence = self.module.get_function( "skip_ahead_sequence") self.skip_ahead_sequence.prepare("Pii") self.skip_ahead_sequence_array = self.module.get_function( "skip_ahead_sequence_array") self.skip_ahead_sequence_array.prepare("PiP") from pycuda.characterize import has_stack has_stack = has_stack() if has_stack: prev_stack_size = drv.Context.get_limit(drv.limit.STACK_SIZE) try: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, 1 << 14) # 16k try: p.prepared_call((self.block_count, 1), (self.generators_per_block, 1, 1), self.state, generator_count, seed.gpudata, offset) except drv.LaunchError: raise ValueError( "Initialisation failed. Decrease number of threads.") finally: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, prev_stack_size)
def __init__(self, seed_getter=None, offset=0): """ :arg seed_getter: a function that, given an integer count, will yield an `int32` :class:`GPUArray` of seeds. """ super(XORWOWRandomNumberGenerator, self).__init__( 'curandStateXORWOW', 'unsigned int', xorwow_random_source+ xorwow_skip_ahead_sequence_source+random_skip_ahead64_source) generator_count = self.generators_per_block * self.block_count if seed_getter is None: seed = array.to_gpu( np.asarray( np.random.random_integers( 0, (1 << 31) - 2, generator_count), dtype=np.int32)) else: seed = seed_getter(generator_count) if not (isinstance(seed, pycuda.gpuarray.GPUArray) and seed.dtype == np.int32 and seed.size == generator_count): raise TypeError("seed must be GPUArray of integers of right length") p = self.module.get_function("prepare") p.prepare("PiPi") self.skip_ahead_sequence = self.module.get_function("skip_ahead_sequence") self.skip_ahead_sequence.prepare("Pii") self.skip_ahead_sequence_array = self.module.get_function("skip_ahead_sequence_array") self.skip_ahead_sequence_array.prepare("PiP") from pycuda.characterize import has_stack has_stack = has_stack() if has_stack: prev_stack_size = drv.Context.get_limit(drv.limit.STACK_SIZE) try: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, 1<<14) # 16k try: p.prepared_call( (self.block_count, 1), (self.generators_per_block, 1, 1), self.state, generator_count, seed.gpudata, offset) except drv.LaunchError: raise ValueError("Initialisation failed. Decrease number of threads.") finally: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, prev_stack_size)
def __init__(self, dir_vector, dir_vector_dtype, dir_vector_size, dir_vector_set, scramble_vector, scramble_vector_function, offset, state_type, vector_type, generator_bits, scramble_type, sobol_random_source): super(_ScrambledSobolRandomNumberGeneratorBase, self).__init__(state_type, vector_type, generator_bits, sobol_random_source, scramble_type) if dir_vector is None: dir_vector = generate_direction_vectors( self.block_count * self.generators_per_block, dir_vector_set) if scramble_vector is None: scramble_vector = scramble_vector_function( self.block_count * self.generators_per_block) if not (isinstance(dir_vector, pycuda.gpuarray.GPUArray) and dir_vector.dtype == dir_vector_dtype and dir_vector.shape == (self.block_count * self.generators_per_block, dir_vector_size)): raise TypeError("seed must be GPUArray of integers of right length") if not (isinstance(scramble_vector, pycuda.gpuarray.GPUArray) and scramble_vector.dtype == dir_vector_dtype and scramble_vector.shape == (self.block_count * self.generators_per_block, )): raise TypeError("scramble must be GPUArray of integers of right length") p = self.module.get_function("prepare") p.prepare("PiPPi") from pycuda.characterize import has_stack has_stack = has_stack() if has_stack: prev_stack_size = drv.Context.get_limit(drv.limit.STACK_SIZE) try: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, 1<<14) # 16k try: p.prepared_call((self.block_count, 1), (self.generators_per_block, 1, 1), self.state, self.block_count * self.generators_per_block, dir_vector.gpudata, scramble_vector.gpudata, offset) except drv.LaunchError: raise ValueError("Initialisation failed. Decrease number of threads.") finally: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, prev_stack_size)
def __init__(self, seed_getter, offset, state_type, vector_type, generator_bits, additional_source, scramble_type=None): super(_PseudoRandomNumberGeneratorBase, self).__init__( state_type, vector_type, generator_bits, additional_source) generator_count = self.generators_per_block * self.block_count if seed_getter is None: seed = array.to_gpu( np.asarray( np.random.random_integers( 0, (1 << 31) - 2, generator_count), dtype=np.int32)) else: seed = seed_getter(generator_count) if not (isinstance(seed, pycuda.gpuarray.GPUArray) and seed.dtype == np.int32 and seed.size == generator_count): raise TypeError("seed must be GPUArray of integers of right length") p = self.module.get_function("prepare") p.prepare("PiPi") from pycuda.characterize import has_stack has_stack = has_stack() if has_stack: prev_stack_size = drv.Context.get_limit(drv.limit.STACK_SIZE) try: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, 1<<14) # 16k try: p.prepared_call( (self.block_count, 1), (self.generators_per_block, 1, 1), self.state, generator_count, seed.gpudata, offset) except drv.LaunchError: raise ValueError("Initialisation failed. Decrease number of threads.") finally: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, prev_stack_size)
def __init__(self, dir_vector=None, offset=0): super(Sobol32RandomNumberGenerator, self).__init__('curandStateSobol32', sobol32_random_source) if dir_vector is None: dir_vector = generate_direction_vectors( self.block_count * self.generators_per_block) if not (isinstance(dir_vector, pycuda.gpuarray.GPUArray) and dir_vector.dtype == np.int32 and dir_vector.shape == (self.block_count * self.generators_per_block, 32)): raise TypeError("seed must be GPUArray of integers of right length") p = self.module.get_function("prepare") p.prepare("PiPi", block=(self.generators_per_block, 1, 1)) from pycuda.characterize import has_stack has_stack = has_stack() if has_stack: prev_stack_size = drv.Context.get_limit(drv.limit.STACK_SIZE) try: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, 1<<14) # 16k try: dev = drv.Context.get_device() if dev.compute_capability() >= (2, 0): p.prepared_call((self.block_count, 1), self.state, self.block_count * self.generators_per_block, dir_vector.gpudata, offset) else: p.prepared_call((2 * self.block_count, 1), self.state, self.block_count * self.generators_per_block // 2, dir_vector.gpudata, offset) except drv.LaunchError: raise ValueError("Initialisation failed. Decrease number of threads.") finally: if has_stack: drv.Context.set_limit(drv.limit.STACK_SIZE, prev_stack_size)