def test_consistency_GPU_parallel(): """ Verify that the random numbers generated by GPU_mrg_uniform, in parallel, are the same as the reference (Java) implementation by L'Ecuyer et al. """ if not cuda_available: raise SkipTest('Optional package cuda not available') if config.mode == 'FAST_COMPILE': mode = 'FAST_RUN' else: mode = config.mode seed = 12345 n_samples = 5 n_streams = 12 n_substreams = 7 # 7 samples will be drawn in parallel samples = [] curr_rstate = numpy.array([seed] * 6, dtype='int32') for i in range(n_streams): stream_samples = [] rstate = [curr_rstate.copy()] for j in range(1, n_substreams): rstate.append(rng_mrg.ff_2p72(rstate[-1])) rstate = numpy.asarray(rstate).flatten() # HACK - transfer these int32 to the GPU memory as float32 # (reinterpret_cast) tmp_float_buf = numpy.frombuffer(rstate.data, dtype='float32') # Transfer to device rstate = float32_shared_constructor(tmp_float_buf) new_rstate, sample = rng_mrg.GPU_mrg_uniform.new(rstate, ndim=None, dtype='float32', size=(n_substreams, )) rstate.default_update = new_rstate # Not really necessary, just mimicking # rng_mrg.MRG_RandomStreams' behavior sample.rstate = rstate sample.update = (rstate, new_rstate) # We need the sample back in the main memory cpu_sample = tensor.as_tensor_variable(sample) f = theano.function([], cpu_sample, mode=mode) for k in range(n_samples): s = f() stream_samples.append(s) samples.append(numpy.array(stream_samples).T.flatten()) # next stream curr_rstate = rng_mrg.ff_2p134(curr_rstate) samples = numpy.array(samples).flatten() assert (numpy.allclose(samples, java_samples))
def test_consistency_GPU_parallel(): """ Verify that the random numbers generated by GPU_mrg_uniform, in parallel, are the same as the reference (Java) implementation by L'Ecuyer et al. """ if not cuda_available: raise SkipTest('Optional package cuda not available') if config.mode == 'FAST_COMPILE': mode = 'FAST_RUN' else: mode = config.mode seed = 12345 n_samples = 5 n_streams = 12 n_substreams = 7 # 7 samples will be drawn in parallel samples = [] curr_rstate = numpy.array([seed] * 6, dtype='int32') for i in range(n_streams): stream_samples = [] rstate = [curr_rstate.copy()] for j in range(1, n_substreams): rstate.append(rng_mrg.ff_2p72(rstate[-1])) rstate = numpy.asarray(rstate).flatten() # HACK - transfer these int32 to the GPU memory as float32 # (reinterpret_cast) tmp_float_buf = numpy.frombuffer(rstate.data, dtype='float32') # Transfer to device rstate = float32_shared_constructor(tmp_float_buf) new_rstate, sample = rng_mrg.GPU_mrg_uniform.new(rstate, ndim=None, dtype='float32', size=(n_substreams,)) rstate.default_update = new_rstate # Not really necessary, just mimicking # rng_mrg.MRG_RandomStreams' behavior sample.rstate = rstate sample.update = (rstate, new_rstate) # We need the sample back in the main memory cpu_sample = tensor.as_tensor_variable(sample) f = theano.function([], cpu_sample, mode=mode) for k in range(n_samples): s = f() stream_samples.append(s) samples.append(numpy.array(stream_samples).T.flatten()) # next stream curr_rstate = rng_mrg.ff_2p134(curr_rstate) samples = numpy.array(samples).flatten() assert(numpy.allclose(samples, java_samples))
def test_consistency_GPU_serial(): """Verify that the random numbers generated by GPU_mrg_uniform, serially, are the same as the reference (Java) implementation by L'Ecuyer et al. """ if not cuda_available: raise SkipTest("Optional package cuda not available") if config.mode == "FAST_COMPILE": mode = "FAST_RUN" else: mode = config.mode seed = 12345 n_samples = 5 n_streams = 12 n_substreams = 7 samples = [] curr_rstate = numpy.array([seed] * 6, dtype="int32") for i in range(n_streams): stream_rstate = curr_rstate.copy() for j in range(n_substreams): substream_rstate = numpy.array(stream_rstate.copy(), dtype="int32") # HACK - we transfer these int32 to the GPU memory as float32 # (reinterpret_cast) tmp_float_buf = numpy.frombuffer(substream_rstate.data, dtype="float32") rstate = float32_shared_constructor(tmp_float_buf) # Transfer to device new_rstate, sample = rng_mrg.GPU_mrg_uniform.new(rstate, ndim=None, dtype="float32", size=(1,)) rstate.default_update = new_rstate # Not really necessary, just mimicking rng_mrg.MRG_RandomStreams' behavior sample.rstate = rstate sample.update = (rstate, new_rstate) # We need the sample back in the main memory cpu_sample = tensor.as_tensor_variable(sample) f = theano.function([], cpu_sample, mode=mode) for k in range(n_samples): s = f() samples.append(s) # next substream stream_rstate = rng_mrg.ff_2p72(stream_rstate) # next stream curr_rstate = rng_mrg.ff_2p134(curr_rstate) samples = numpy.array(samples).flatten() assert numpy.allclose(samples, java_samples)
def uniform(self, size, low=0.0, high=1.0, ndim=None, dtype=None, nstreams=None): """ Sample a tensor of given size whose element from a uniform distribution between low and high. If the size argument is ambiguous on the number of dimensions, ndim may be a plain integer to supplement the missing information. :param low: Lower bound of the interval on which values are sampled. If the ``dtype`` arg is provided, ``low`` will be cast into dtype. This bound is excluded. :param high: Higher bound of the interval on which values are sampled. If the ``dtype`` arg is provided, ``high`` will be cast into dtype. This bound is excluded. :param size: Can be a list of integer or Theano variable (ex: the shape of other Theano Variable) :param dtype: The output data type. If dtype is not specified, it will be inferred from the dtype of low and high, but will be at least as precise as floatX. """ low = as_tensor_variable(low) high = as_tensor_variable(high) if dtype is None: dtype = scal.upcast(config.floatX, low.dtype, high.dtype) low = cast(low, dtype=dtype) high = cast(high, dtype=dtype) if isinstance(size, tuple): msg = "size must be a tuple of int or a Theano variable" assert all([isinstance(i, (numpy.integer, int, Variable)) for i in size]), msg if any([isinstance(i, (numpy.integer, int)) and i <= 0 for i in size]): raise ValueError( "The specified size contains a dimension with value <= 0", size) else: if not (isinstance(size, Variable) and size.ndim == 1): raise TypeError("size must be a tuple of int or a Theano " "Variable with 1 dimension, got " + str(size) + " of type " + str(type(size))) if nstreams is None: nstreams = self.n_streams(size) if self.use_cuda and dtype == 'float32': rstates = self.get_substream_rstates(nstreams) rstates = rstates.flatten() # HACK - we use fact that int32 and float32 have same size to # sneak ints into the CudaNdarray type. # these *SHOULD NEVER BE USED AS FLOATS* tmp_float_buf = numpy.frombuffer(rstates.data, dtype='float32') assert tmp_float_buf.shape == rstates.shape assert (tmp_float_buf.view('int32') == rstates).all() # transfer to device node_rstate = float32_shared_constructor(tmp_float_buf) assert isinstance(node_rstate.type, CudaNdarrayType) # we can't use the normal mrg_uniform constructor + later # optimization # because of the tmp_float_buf hack above. There is # currently no Theano node that will do a frombuffer # reinterpretation. u = self.pretty_return(node_rstate, *GPU_mrg_uniform.new(node_rstate, ndim, dtype, size)) else: node_rstate = shared(self.get_substream_rstates(nstreams)) u = self.pretty_return(node_rstate, *mrg_uniform.new(node_rstate, ndim, dtype, size)) r = u * (high - low) + low if u.type.broadcastable != r.type.broadcastable: raise NotImplementedError( 'Increase the size to match the broadcasting pattern of ' '`low` and `high` arguments') assert r.dtype == dtype return r
def uniform(self, size, low=0.0, high=1.0, ndim=None, dtype=None, nstreams=None): """ Sample a tensor of given size whose element from a uniform distribution between low and high. If the size argument is ambiguous on the number of dimensions, ndim may be a plain integer to supplement the missing information. :param low: Lower bound of the interval on which values are sampled. If the ``dtype`` arg is provided, ``low`` will be cast into dtype. :param high: Higher bound of the interval on which values are sampled. If the ``dtype`` arg is provided, ``high`` will be cast into dtype. :param size: Can be a list of integer or Theano variable (ex: the shape of other Theano Variable) :param dtype: The output data type. If dtype is not specified, it will be inferred from the dtype of low and high, but will be at least as precise as floatX. """ low = as_tensor_variable(low) high = as_tensor_variable(high) if dtype is None: dtype = scal.upcast(config.floatX, low.dtype, high.dtype) low = cast(low, dtype=dtype) high = cast(high, dtype=dtype) if isinstance(size, tuple): msg = "size must be a tuple of int or a Theano variable" assert all([ isinstance(i, (numpy.integer, int, Variable)) for i in size ]), msg if any( [isinstance(i, (numpy.integer, int)) and i <= 0 for i in size]): raise ValueError( "The specified size contains a dimension with value <= 0", size) else: if not (isinstance(size, Variable) and size.ndim == 1): raise TypeError("size must be a tuple of int or a Theano " "Variable with 1 dimension, got " + str(size) + " of type " + str(type(size))) if nstreams is None: nstreams = self.n_streams(size) if self.use_cuda and dtype == 'float32': rstates = self.get_substream_rstates(nstreams) rstates = rstates.flatten() # HACK - we use fact that int32 and float32 have same size to # sneak ints into the CudaNdarray type. # these *SHOULD NEVER BE USED AS FLOATS* tmp_float_buf = numpy.frombuffer(rstates.data, dtype='float32') assert tmp_float_buf.shape == rstates.shape assert (tmp_float_buf.view('int32') == rstates).all() # transfer to device node_rstate = float32_shared_constructor(tmp_float_buf) assert isinstance(node_rstate.type, CudaNdarrayType) # we can't use the normal mrg_uniform constructor + later # optimization # because of the tmp_float_buf hack above. There is # currently no Theano node that will do a frombuffer # reinterpretation. u = self.pretty_return( node_rstate, *GPU_mrg_uniform.new(node_rstate, ndim, dtype, size)) else: node_rstate = shared(self.get_substream_rstates(nstreams)) u = self.pretty_return( node_rstate, *mrg_uniform.new(node_rstate, ndim, dtype, size)) r = u * (high - low) + low if u.type.broadcastable != r.type.broadcastable: raise NotImplementedError( 'Increase the size to match the broadcasting pattern of ' '`low` and `high` arguments') assert r.dtype == dtype return r