def sample(self, n, seed=3): with util.NumpySeedContext(seed=seed): X = np.log(old_div(1, self.inh2d(lamb_bar=n)) - 1) if len(X.shape) == 1: # This can happen if d=1 X = X[:, np.newaxis] return Data(X)
def sample_d_variates(w, n, D, seed=81): """ Return an n x D sample matrix. """ with util.NumpySeedContext(seed=seed): # rejection sampling sam = np.zeros((n, D)) # sample block_size*D at a time. block_size = 500 from_ind = 0 while from_ind < n: # uniformly randomly draw x, y from U(-pi, pi) X = stats.uniform.rvs(loc=-math.pi, scale=2 * math.pi, size=D * block_size) X = np.reshape(X, (block_size, D)) un_den = 1.0 + np.prod(np.sin(w * X), 1) I = stats.uniform.rvs(size=block_size) < un_den / 2.0 # accept accepted_count = np.sum(I) to_take = min(n - from_ind, accepted_count) end_ind = from_ind + to_take AX = X[I, :] X_take = AX[:to_take, :] sam[from_ind:end_ind, :] = X_take from_ind = end_ind return sam
def sample(self, n, seed=29): pmix = self.pmix means = self.means variances = self.variances k, d = self.means.shape sam_list = [] with util.NumpySeedContext(seed=seed): # counts for each mixture component counts = np.random.multinomial(n, pmix, size=1) # counts is a 2d array counts = counts[0] # For each component, draw from its corresponding mixture component. for i, nc in enumerate(counts): # construct the component # https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.stats.multivariate_normal.html cov = variances[i] mnorm = stats.multivariate_normal(means[i], cov) # Sample from ith component sam_i = mnorm.rvs(size=nc) sam_list.append(sam_i) sample = np.vstack(sam_list) assert sample.shape[0] == n np.random.shuffle(sample) return Data(sample)
def sample(self, n, seed=2): with util.NumpySeedContext(seed=seed): d = len(self.mean) mean = self.mean variance = self.variance X = np.random.randn(n, d) * np.sqrt(variance) + mean return Data(X)
def sample(self, n, seed=872): """ Rejection sampling. """ d = len(self.freqs) sigma2 = self.sigma2 freqs = self.freqs with util.NumpySeedContext(seed=seed): # rejection sampling sam = np.zeros((n, d)) # sample block_size*d at a time. block_size = 500 from_ind = 0 while from_ind < n: # The proposal q is N(0, sigma2*I) X = np.random.randn(block_size, d) * np.sqrt(sigma2) q_un = np.exp(old_div(-np.sum(X**2, 1), (2.0 * sigma2))) # unnormalized density p p_un = q_un * (1 + np.prod(np.cos(X * freqs), 1)) c = 2.0 I = stats.uniform.rvs(size=block_size) < old_div( p_un, (c * q_un)) # accept accepted_count = np.sum(I) to_take = min(n - from_ind, accepted_count) end_ind = from_ind + to_take AX = X[I, :] X_take = AX[:to_take, :] sam[from_ind:end_ind, :] = X_take from_ind = end_ind return Data(sam)
def sample(self, n, seed=3): with util.NumpySeedContext(seed=seed): X = np.log(self.nonhom_linear(size=n)) if len(X.shape) == 1: # This can happen if d=1 X = X[:, np.newaxis] return Data(X)
def simulate_null_dist(eigs, J, n_simulate=2000, seed=7): """ Simulate the null distribution using the spectrums of the covariance matrix of the U-statistic. The simulated statistic is n*FSSD^2 where FSSD is an unbiased estimator. - eigs: a numpy array of estimated eigenvalues of the covariance matrix. eigs is of length d*J, where d is the input dimension, and - J: the number of test locations. Return a numpy array of simulated statistics. """ d = old_div(len(eigs), J) assert d > 0 # draw at most d x J x block_size values at a time block_size = max(20, int(old_div(1000.0, (d * J)))) fssds = np.zeros(n_simulate) from_ind = 0 with util.NumpySeedContext(seed=seed): while from_ind < n_simulate: to_draw = min(block_size, n_simulate - from_ind) # draw chi^2 random variables. chi2 = np.random.randn(d * J, to_draw)**2 # an array of length to_draw sim_fssds = eigs.dot(chi2 - 1.0) # store end_ind = from_ind + to_draw fssds[from_ind:end_ind] = sim_fssds from_ind = end_ind return fssds
def sample(self, n, seed=3): with util.NumpySeedContext(seed=seed): X_gmm, llh = self.gmm_sample(N=n) X = X_gmm if len(X.shape) == 1: # This can happen if d=1 X = X[:, np.newaxis] return Data(X)
def sample(self, n, seed=3): with util.NumpySeedContext(seed=seed): mvn = stats.multivariate_normal(self.mean, self.cov) X = mvn.rvs(size=n) if len(X.shape) == 1: # This can happen if d=1 X = X[:, np.newaxis] return Data(X)
def sample(self, n, seed): d = self.dimx with util.NumpySeedContext(seed=seed): Z = np.random.randn(n, 1) X = np.random.randn(n, d) Xs = np.sign(X) Y = np.prod(Xs, 1)[:, np.newaxis] * np.abs(Z) return PairedData(X, Y, label='gauss_sign_dx%d' % d)
def sample(self, n, seed=3): with util.NumpySeedContext(seed=seed): X = stats.gamma.rvs(self.alpha, size=n, scale=old_div(1.0, self.beta)) if len(X.shape) == 1: # This can happen if d=1 X = X[:, np.newaxis] return Data(X)
def sample(self, n, seed): d = self.dimx with util.NumpySeedContext(seed=seed): Z = np.random.randn(n, d / 2 + 1) X = np.random.randn(n, d) Y = np.zeros((n, 1)) for j in range(d / 2): Y = Y + np.sign(X[:, [2 * j]] * X[:, [2 * j + 1]]) * np.abs( Z[:, [j]]) Y = np.sqrt(2.0 / d) * Y + Z[:, [d / 2]] return PairedData(X, Y, label='pairwise_sign_dx%d' % self.dimx)
def sample(self, n, seed=44): with util.NumpySeedContext(seed=seed + 100): NX = np.random.randn(n, self.ndx) NY = np.random.randn(n, self.ndy) pdata = self.ps.sample(n, seed=seed) X, Y = pdata.xy() Zx = np.hstack((X, NX)) Zy = np.hstack((Y, NY)) new_label = None if pdata.label is None else \ pdata.label + '_ndx%d'%self.ndx + '_ndy%d'%self.ndy return PairedData(Zx, Zy, label=new_label)
def fcompute_pvalues_for_processes(self, U_matrix, chane_prob, num_bootstrapped_stats=300): N = U_matrix.shape[0] bootsraped_stats = np.zeros(num_bootstrapped_stats) with util.NumpySeedContext(seed=10): for proc in range(num_bootstrapped_stats): # W = np.sign(orsetinW[:,proc]) W = simulatepm(N, chane_prob) WW = np.outer(W, W) st = np.mean(U_matrix * WW) bootsraped_stats[proc] = N * st stat = N * np.mean(U_matrix) return float(np.sum(bootsraped_stats > stat)) / num_bootstrapped_stats
def gen_features(self, X): # The following block of code is deterministic given seed. # Fourier transform formula from # http://mathworld.wolfram.com/FourierTransformGaussian.html with util.NumpySeedContext(seed=self.seed): n, d = X.shape draws = self.n_features // 2 W = np.random.randn(draws, d) / np.sqrt(self.sigma2) # n x draws XWT = X.dot(W.T) Z1 = np.cos(XWT) Z2 = np.sin(XWT) Z = np.hstack((Z1, Z2)) * np.sqrt(2.0 / self.n_features) return Z
def perform_test(self, dat, return_simulated_stats=False, return_ustat_gram=False): """ dat: a instance of Data """ with util.ContextTimer() as t: alpha = self.alpha n_simulate = self.n_simulate X = dat.data() n = X.shape[0] _, H = self.compute_stat(dat, return_ustat_gram=True) test_stat = n * np.mean(H) # bootrapping sim_stats = np.zeros(n_simulate) with util.NumpySeedContext(seed=self.seed): for i in range(n_simulate): W = self.bootstrapper(n) # n * [ (1/n^2) * \sum_i \sum_j h(x_i, x_j) w_i w_j ] boot_stat = W.dot(H.dot(old_div(W, float(n)))) # This is a bootstrap version of n*V_n sim_stats[i] = boot_stat # approximate p-value with the permutations pvalue = np.mean(sim_stats > test_stat) results = { 'alpha': self.alpha, 'pvalue': pvalue, 'test_stat': test_stat, 'h0_rejected': pvalue < alpha, 'n_simulate': n_simulate, 'time_secs': t.secs, } if return_simulated_stats: results['sim_stats'] = sim_stats if return_ustat_gram: results['H'] = H return results
def sample(self, n, seed=29): pmix = self.pmix means = self.means variances = self.variances k, d = self.means.shape sam_list = [] with util.NumpySeedContext(seed=seed): # counts for each mixture component counts = np.random.multinomial(n, pmix, size=1) # counts is a 2d array counts = counts[0] # For each component, draw from its corresponding mixture component. for i, nc in enumerate(counts): # Sample from ith component sam_i = np.random.randn(nc, d) * np.sqrt( variances[i]) + means[i] sam_list.append(sam_i) sample = np.vstack(sam_list) assert sample.shape[0] == n np.random.shuffle(sample) return Data(sample)
def sample(self, n, seed=3, return_latent=False): """ Sample by blocked Gibbs sampling """ B = self.B b = self.b c = self.c dh = len(c) dx = len(b) # Initialize the state of the Markov chain with util.NumpySeedContext(seed=seed): X = np.random.randn(n, dx) H = np.random.randint(1, 2, (n, dh)) * 2 - 1.0 # burn-in for t in range(self.burnin): X, H = self._blocked_gibbs_next(X, H) # sampling X, H = self._blocked_gibbs_next(X, H) if return_latent: return Data(X), H else: return Data(X)
def sample(self, n, seed=4): with util.NumpySeedContext(seed=seed): X = np.random.laplace(loc=self.loc, scale=self.scale, size=(n, self.d)) return Data(X)
def sample(self, n, seed=5): with util.NumpySeedContext(seed=seed): X = stats.t.rvs(df=self.df, size=n) X = X[:, np.newaxis] return Data(X)