def initialize_pyrngs(): from gslrandom import PyRNG, get_omp_num_threads if "OMP_NUM_THREADS" in os.environ: num_threads = os.environ["OMP_NUM_THREADS"] else: num_threads = get_omp_num_threads() assert num_threads > 0 # Choose random seeds seeds = np.random.randint(2**16, size=num_threads) return [PyRNG(seed) for seed in seeds]
def __init__(self, model, T, S, F, minibatchfrac=1.): """ Initialize a parent array Z of size TxKxKxB to model the event parents for data matrix S (TxK) which has been filtered to create filtered data array F (TxKxB). Also create a background parent array of size TxK to specify how many events are attributed to the background process. :param T: Number of time bins :param K: Number of processes :param B: Number of basis functions :param S: Data matrix (TxK) :param F: Filtered data matrix (TxKxB) """ self.model = model self.dt = model.dt self.K = model.K self.B = model.B # TODO: Remove dependencies on S and F self.T = T # self.S = S # self.F = F self.minibatchfrac = minibatchfrac # Save sparse versions of S and F self.ts = [] self.Ts = [] self.Ns = [] self.Ss = [] self.Fs = [] for k in range(self.K): # Find the rows where S[:,k] is nonzero tk = np.where(S[:, k])[0] self.ts.append(tk) self.Ts.append(len(tk)) self.Ss.append(S[tk, k].astype(np.uint32)) self.Ns.append(S[tk, k].sum()) self.Fs.append(F[tk]) self.Ns = np.array(self.Ns) # The base class handles the parent variables # We use a sparse representation that only considers times (rows) # where there is a spike self._Z = None self._EZ = None # Initialize GSL RNGs for resampling Z from gslrandom import PyRNG, get_omp_num_threads num_threads = max(1, get_omp_num_threads()) seeds = np.random.randint(2**16, size=num_threads) self.pyrngs = [PyRNG(seed) for seed in seeds]
def test_multi_rng_multi_N_multi_p_with_out(): K = 5 A = 3 B = 2 N_AB = (np.arange(1, A*B+1) * 10).reshape((A, B)) p_ABK = np.ones((A, B, K)) / K p_ABK[0, 1, :] = [0.5, 0.25, 0.05, 0.1, 0.1] # make one non-uniform p_ABK[1, 0, :] = [0.9, 0.05, 0.03, 0.01, 0.01] # make one really non-uniform rngs = [PyRNG(rn.randint(2**16)) for _ in xrange(get_omp_num_threads())] n_iter = 10000 z_ABK = np.zeros((A, B, K)) for _ in xrange(n_iter): n_ABK = np.zeros((A, B, K), dtype=np.uint32) multinomial(rngs, N_AB, p_ABK, out=n_ABK) np.allclose(n_ABK.sum(axis=-1), N_AB) z_ABK += n_ABK norm_z_ABK = z_ABK.astype(float) / np.sum(z_ABK, axis=-1, keepdims=True) assert np.allclose(norm_z_ABK, p_ABK, atol=1e-2)
def test_multi_rng_multi_N_multi_p_with_out(): K = 5 A = 3 B = 2 N_AB = (np.arange(1, A * B + 1) * 10).reshape((A, B)) p_ABK = np.ones((A, B, K)) / K p_ABK[0, 1, :] = [0.5, 0.25, 0.05, 0.1, 0.1] # make one non-uniform p_ABK[1, 0, :] = [0.9, 0.05, 0.03, 0.01, 0.01] # make one really non-uniform rngs = [PyRNG(rn.randint(2**16)) for _ in xrange(get_omp_num_threads())] n_iter = 10000 z_ABK = np.zeros((A, B, K)) for _ in xrange(n_iter): n_ABK = np.zeros((A, B, K), dtype=np.uint32) multinomial(rngs, N_AB, p_ABK, out=n_ABK) np.allclose(n_ABK.sum(axis=-1), N_AB) z_ABK += n_ABK norm_z_ABK = z_ABK.astype(float) / np.sum(z_ABK, axis=-1, keepdims=True) assert np.allclose(norm_z_ABK, p_ABK, atol=1e-2)
def test_parallel_multi_N_multi_p_with_out(): # Multiple N counts, multiple p arrays, out structure provided L = 10 N = np.arange(L, dtype=np.uint32) + 10 K = 5 p = np.zeros((L, K)) p[:5] = 1./K * np.ones(K) p[5:] = np.asarray([0.5, 0.25, 0.05, 0.1, 0.1]) # Create some RNGs rngs = [PyRNG() for _ in xrange(get_omp_num_threads())] n_iter = 1000000 z = np.zeros((L,K)) for _ in xrange(n_iter): out = np.zeros((L,K), dtype=np.uint32) multinomial_par(rngs, N, p, out) assert out.shape == (L,K) assert (out.sum(axis=1) == N).all() z += out print z/z.sum(axis=1)[:,np.newaxis] assert (np.abs(z/z.sum(axis=1)[:,np.newaxis] - p) < 1e-2).all()
from gslrandom import PyRNG, multinomial, seeded_multinomial, get_omp_num_threads import numpy as np import numpy.random as rn import time I = 100000 K = 1000 N_I = rn.poisson(rn.gamma(2, 500, size=I)).astype(np.uint32) P_IK = 1. / K * np.ones((I, K), dtype=np.float) N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() seeded_multinomial(N_I, P_IK, N_IK) print '%fs: No PyRNG parallel version with %d cores' % (time.time() - s, get_omp_num_threads()) assert (N_IK.sum(axis=1) == N_I).all() rngs = [PyRNG(rn.randint(2**16)) for _ in xrange(get_omp_num_threads())] N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() multinomial(rngs, N_I, P_IK, N_IK) print '%fs: PyRNG parallel version with %d cores' % (time.time() - s, len(rngs)) assert (N_IK.sum(axis=1) == N_I).all() rng = PyRNG(rn.randint(2**16)) N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() multinomial(rng, N_I, P_IK, N_IK) print '%fs: PyRNG version with 1 core' % (time.time() - s)
def __init__(self, model, T, S, F): """ Initialize a parent array Z of size TxKxKxB to model the event parents for data matrix S (TxK) which has been filtered to create filtered data array F (TxKxB). Also create a background parent array of size TxK to specify how many events are attributed to the background process. :param T: Number of time bins :param K: Number of processes :param B: Number of basis functions :param S: Data matrix (TxK) :param F: Filtered data matrix (TxKxB) """ self.model = model self.dt = model.dt self.K = model.K self.B = model.B # TODO: Remove dependencies on S and F self.T = T self.S = S self.F = F # Save sparse versions of S and F self.ts = [] self.Ts = [] self.Ns = [] self.Ss = [] self.Fs = [] for k in range(self.K): # Find the rows where S[:,k] is nonzero tk = np.where(S[:,k])[0] self.ts.append(tk) self.Ts.append(len(tk)) self.Ss.append(S[tk,k].astype(np.uint32)) self.Ns.append(S[tk,k].sum()) self.Fs.append(F[tk]) self.Ns = np.array(self.Ns) # The base class handles the parent variables # We use a sparse representation that only considers times (rows) # where there is a spike self._Z = None self._EZ = None # Initialize GSL RNGs for resampling Z try: from gslrandom import PyRNG, get_omp_num_threads if "OMP_NUM_THREADS" in os.environ: num_threads = os.environ["OMP_NUM_THREADS"] else: num_threads = get_omp_num_threads() assert num_threads > 0 # Choose random seeds seeds = np.random.randint(2 ** 16, size=num_threads) self.pyrngs = [PyRNG(seed) for seed in seeds] self.USE_GSL = True except: warn("Failed to import gslrandom for parallel multinomial sampling. " "Defaulting to pure python instead. " "This will have a significant impact on performance. " "To install gslrandom, see https://github.com/slinderman/gslrandom") self.USE_GSL = False
def __init__(self, model, T, S, F): """ Initialize a parent array Z of size TxKxKxB to model the event parents for data matrix S (TxK) which has been filtered to create filtered data array F (TxKxB). Also create a background parent array of size TxK to specify how many events are attributed to the background process. :param T: Number of time bins :param K: Number of processes :param B: Number of basis functions :param S: Data matrix (TxK) :param F: Filtered data matrix (TxKxB) """ self.model = model self.dt = model.dt self.K = model.K self.B = model.B # TODO: Remove dependencies on S and F self.T = T self.S = S self.F = F # Save sparse versions of S and F self.ts = [] self.Ts = [] self.Ns = [] self.Ss = [] self.Fs = [] for k in range(self.K): # Find the rows where S[:,k] is nonzero tk = np.where(S[:, k])[0] self.ts.append(tk) self.Ts.append(len(tk)) self.Ss.append(S[tk, k].astype(np.uint32)) self.Ns.append(S[tk, k].sum()) self.Fs.append(F[tk]) self.Ns = np.array(self.Ns) # The base class handles the parent variables # We use a sparse representation that only considers times (rows) # where there is a spike self._Z = None self._EZ = None # Initialize GSL RNGs for resampling Z try: from gslrandom import PyRNG, get_omp_num_threads if "OMP_NUM_THREADS" in os.environ: num_threads = os.environ["OMP_NUM_THREADS"] else: num_threads = get_omp_num_threads() assert num_threads > 0 # Choose random seeds seeds = np.random.randint(2**16, size=num_threads) self.pyrngs = [PyRNG(seed) for seed in seeds] self.USE_GSL = True except: warn( "Failed to import gslrandom for parallel multinomial sampling. " "Defaulting to pure python instead. " "This will have a significant impact on performance. " "To install gslrandom, see https://github.com/slinderman/gslrandom" ) self.USE_GSL = False
from gslrandom import PyRNG, multinomial, seeded_multinomial, get_omp_num_threads import numpy as np import numpy.random as rn import time I = 100000 K = 1000 N_I = rn.poisson(rn.gamma(2, 500, size=I)).astype(np.uint32) P_IK = 1./K * np.ones((I, K), dtype=np.float) N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() seeded_multinomial(N_I, P_IK, N_IK) print '%fs: No PyRNG parallel version with %d cores' % (time.time() - s, get_omp_num_threads()) assert (N_IK.sum(axis=1) == N_I).all() rngs = [PyRNG(rn.randint(2**16)) for _ in xrange(get_omp_num_threads())] N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() multinomial(rngs, N_I, P_IK, N_IK) print '%fs: PyRNG parallel version with %d cores' % (time.time() - s, len(rngs)) assert (N_IK.sum(axis=1) == N_I).all() rng = PyRNG(rn.randint(2**16)) N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() multinomial(rng, N_I, P_IK, N_IK) print '%fs: PyRNG version with 1 core' % (time.time() - s) assert (N_IK.sum(axis=1) == N_I).all()
from gslrandom import PyRNG, multinomial, seeded_multinomial, get_omp_num_threads import numpy as np import numpy.random as rn import time I = 100000 K = 1000 N_I = rn.poisson(rn.gamma(2, 500, size=I)).astype(np.uint32) P_IK = 1./K * np.ones((I, K), dtype=np.float) N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() seeded_multinomial(N_I, P_IK, N_IK) print(('%fs: No PyRNG parallel version with %d cores' % (time.time() - s, get_omp_num_threads()))) assert (N_IK.sum(axis=1) == N_I).all() rngs = [PyRNG(rn.randint(2**16)) for _ in range(get_omp_num_threads())] N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() multinomial(rngs, N_I, P_IK, N_IK) print(('%fs: PyRNG parallel version with %d cores' % (time.time() - s, len(rngs)))) assert (N_IK.sum(axis=1) == N_I).all() rng = PyRNG(rn.randint(2**16)) N_IK = np.ones((I, K), dtype=np.uint32) s = time.time() multinomial(rng, N_I, P_IK, N_IK) print(('%fs: PyRNG version with 1 core' % (time.time() - s))) assert (N_IK.sum(axis=1) == N_I).all()