def generate_data(self,n=1e4, k=2, ncomps=3, seed=1): npr.seed(seed) data1_concat = [] data2_concat = [] labels1_concat = [] labels2_concat = [] for j in xrange(ncomps): mean = gen_mean[j] sd = gen_sd[j] corr = gen_corr[j] cov = np.empty((k, k)) cov.fill(corr) cov[np.diag_indices(k)] = 1 cov *= np.outer(sd, sd) num1 = int(n * group_weights1[j]) num2 = int(n * group_weights2[j]) rvs1 = multivariate_normal(mean, cov, size=num1) rvs2 = multivariate_normal(mean, cov, size=num2) data1_concat.append(rvs1) data2_concat.append(rvs2) labels1_concat.append(np.repeat(j, num1)) labels2_concat.append(np.repeat(j, num2)) return ([np.concatenate(labels1_concat), np.concatenate(labels2_concat)], [np.concatenate(data1_concat, axis=0), np.concatenate(data2_concat, axis=0)])
def genDataset(N, separable = True): n = N//2 if separable: mu1 = np.array([0, 2]) mu2 = np.array([2, 0]) sigma = np.array([[0.8, 0.6], [0.6, 0.8]]) X1 = rn.multivariate_normal(mu1, sigma, N) Y1 = np.ones(len(X1)) X2 = rn.multivariate_normal(mu2, sigma, N) Y2 = np.ones(len(X2)) * -1 X = np.vstack((X1, X2)) Y = np.hstack((Y1, Y2)) else: X = np.random.randn(300, 2) Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0) mu1 = [-1, 2] mu2 = [1, -1] mu3 = [4, -4] mu4 = [-4, 4] sigma = [[1.0,0.8], [0.8, 1.0]] X1 = rn.multivariate_normal(mu1, sigma, n) X1 = np.vstack((X1, np.random.multivariate_normal(mu3, sigma, n))) Y1 = np.ones(len(X1)) X2 = rn.multivariate_normal(mu2, sigma, n) X2 = np.vstack((X2, np.random.multivariate_normal(mu4, sigma, n))) Y2 = np.ones(len(X2)) * -1 X = np.vstack((X1, X2)) Y = np.hstack((Y1, Y2)) return X, Y
def step(self, y, predict_P=False, index=1): X = self.x_prior states = X[:2, :] params = X[2:, :] s_std = std(X[index].A1) tmp_ws = as_array([norm.pdf(y, x[0, index], s_std) for x in states.T]) n_weights = self.weights * tmp_ws sum_weights = n_weights.sum() if sum_weights != 0: n_weights /= sum_weights neff = 1.0 / (n_weights ** 2).sum() if sum_weights == 0 or neff < self.num_part/2.0: idx = choice(range(X.shape[1]), X.shape[1], p=self.weights) self.weights = tile(as_array(1.0 / self.num_part), self.num_part) self.x_post = X[:, idx] else: self.x_post = X self.weights = n_weights p_mean = average(params, axis=1, weights=self.weights).A1 p_cov = cov(params, aweights=self.weights) self.x_post[2:, :] = multivariate_normal(p_mean, p_cov, X.shape[1]).T for i, x in enumerate(self.x_post[2:, :].T): if x.any() < 0: while True: new = multivariate_normal(p_mean, p_cov, 1).T if new.all() > 0 and new[0, 1] > new[0, 2]: self.x_post[2:, i] = new break
def rdm_pnt(nbr_cluster): from numpy.random import uniform from numpy.random import multivariate_normal from numpy import savetxt import sys x = uniform(50, 950) y = uniform(50, 950) z = uniform(-500, 500) + 20.0 cov = [[uniform(-5, 5), z], [z, uniform(-5, 5)]] s = multivariate_normal([x, y], cov, nbr_cluster[1]) xo = s[:, 0] yo = s[:, 1] print s sys.stdout.flush() for i in range(nbr_cluster[0] - 1): x = uniform(50, 950) y = uniform(50, 950) z = uniform(-500, 500) + 20.0 cov = [[uniform(-5, 5), z], [z, uniform(-5, 5)]] s = multivariate_normal([x, y], cov, nbr_cluster[1]) xo.extend(s[:, 0]) yo.extend(s[:, 1]) print xo sys.stdout.flush() savetxt("data.txt", s) return 1
def sample(self, T, s_init=None,x_init=None,y_init=None): """ Inputs: T: time to run simulation Outputs: xs: Hidden continuous states Ss: Hidden switch states """ x_dim, y_dim, = self.x_dim, self.y_dim # Allocate Memory xs = zeros((T, x_dim)) Ss = zeros(T) # Compute Invariant _, vl = linalg.eig(self.Z, left=True, right=False) pi = vl[:,0] # Sample Start conditions sample = multinomial(1, pi, size=1) if s_init == None: Ss[0] = nonzero(sample)[0][0] else: Ss[0] = s_init if x_init == None: xs[0] = multivariate_normal(self.mus[Ss[0]], self.Sigmas[Ss[0]]) else: xs[0] = x_init # Perform time updates for t in range(0,T-1): s = Ss[t] A = self.As[s] b = self.bs[s] Q = self.Qs[s] xs[t+1] = multivariate_normal(dot(A,xs[t]) + b, Q) sample = multinomial(1,self.Z[s],size=1)[0] Ss[t+1] = nonzero(sample)[0][0] return (xs, Ss)
def test(): from .walk import walk from numpy.random import multivariate_normal, seed from numpy import vstack, ones, eye seed(2) # Remove uncertainty on tests # Set a number of good and bad chains Ngood,Nbad = 25,2 # Make chains mean-reverting chains with widely separated values for # bad and good; put bad chains first. chains = walk(1000, mu=[1]*Nbad+[5]*Ngood, sigma=0.45, alpha=0.1) # Check IQR and Grubbs assert (identify_outliers('IQR',chains,None) == arange(Nbad)).all() assert (identify_outliers('Grubbs',chains,None) == arange(Nbad)).all() # Put points for 'bad' chains at [-1,...,-1] and 'good' chains at [1,...,1] x = vstack( (multivariate_normal(-ones(4),.1*eye(4),size=Nbad), multivariate_normal(ones(4),.1*eye(4),size=Ngood)) ) assert identify_outliers('Mahal',chains,x)[0] in range(Nbad) # Put points for _all_ chains at [1,...,1] and check that mahal return [] xsame = multivariate_normal(ones(4),.2*eye(4),size=Ngood+Nbad) assert len(identify_outliers('Mahal',chains,xsame)) == 0 # Check again with large variance x = vstack( (multivariate_normal(-3*ones(4),eye(4),size=Nbad), multivariate_normal(ones(4),10*eye(4),size=Ngood)) ) assert len(identify_outliers('Mahal',chains,x)) == 0 # ===================================================================== # Test replacement # Construct a state object from numpy.linalg import norm from .state import MCMCDraw Ngen, Npop = chains.shape Npop, Nvar = x.shape state = MCMCDraw(Ngen=Ngen, Nthin=Ngen, Nupdate=0, Nvar=Nvar, Npop=Npop, Ncr=0, thinning=0) # Fill it with chains for i in range(Ngen): state._generation(new_draws=Npop, x=x, logp=chains[i], accept=Npop) # Make a copy of the current state so we can check it was updated nx, nlogp = x+0,chains[-1]+0 # Remove outliers state.remove_outliers(nx, nlogp, test='IQR', portion=0.5) # Check that the outliers were removed outliers = state.outliers() assert outliers.shape[0] == Nbad for i in range(Nbad): assert nlogp[outliers[i,1]] == chains[-1][outliers[i,2]] assert norm(nx[outliers[i,1],:] - x[outliers[i,2],:]) == 0
def prepare_dataset(variance): cov1 = np.array([[variance,0],[0,variance]]) cov2 = np.array([[variance,0],[0,variance]]) df1 = DataFrame(multivariate_normal(Mu1,cov1,N1),columns=['x','y']) df1['type'] = 1 df2 = DataFrame(multivariate_normal(Mu2,cov2,N2),columns=['x','y']) df2['type'] = -1 df = pd.concat([df1,df2],ignore_index=True) df = df.reindex(np.random.permutation(df.index)).reset_index(drop=True) return df
def sample(): from numpy.random import rand, multivariate_normal data_a = multivariate_normal(rand(1) * 20 - 10, np.eye(1) * (rand()), 250) data_b = multivariate_normal(rand(1) * 20 - 10, np.eye(1) * (rand()), 250) data_c = multivariate_normal(rand(1) * 20 - 10, np.eye(1) * (rand()), 250) data_d = multivariate_normal(rand(1) * 20 - 10, np.eye(1) * (rand()), 250) X = np.r_[data_a, data_b, data_c, data_d][:, 0] c_cf = change_finder(term=70, window=7, order=(2, 2, 0)) result = c_cf.main(X) return result
def generate_data(): sz = 100 mu1 = randint(1,1000,size=sz) mu2 = randint(1000,2000,size=sz) mu3 = randint(3000,4000,size=sz) cov = np.eye(sz) x1 = multivariate_normal(mu1, cov, 100) x2 = multivariate_normal(mu2, cov, 20) x3 = multivariate_normal(mu3, cov, 50) return np.vstack((x1,x2,x3))
def move(self, s, a): s = np.copy(s) if a == "up": s = s + self.move_up + npr.multivariate_normal(np.array([0., 0]), np.diag([0.25, 0.125])) elif a == "down": s = s + self.move_down + npr.multivariate_normal(np.array([0., 0]), np.diag([0.25, 0.125])) elif a == "left": s = s + self.move_left + npr.multivariate_normal(np.array([0., 0]), np.diag([0.125, 0.25])) elif a == "right": s = s + self.move_right + npr.multivariate_normal(np.array([0., 0]), np.diag([0.125, 0.25])) else: pass # print s return s
def prepare_dataset(variance): n1 = 80 n2 = 200 mu1 = [9,9] mu2 = [-3,-3] cov1 = np.array([[variance,0],[0,variance]]) cov2 = np.array([[variance,0],[0,variance]]) df1 = DataFrame(multivariate_normal(mu1,cov1,n1),columns=['x','y']) df1['type'] = 1 df2 = DataFrame(multivariate_normal(mu2,cov2,n2),columns=['x','y']) df2['type'] = 0 df = pd.concat([df1,df2],ignore_index=True) df = df.reindex(np.random.permutation(df.index)).reset_index() return df[['x','y','type']]
def GenerateGaussians(N, Ntr): # Generate the data. mu0, mu1 = np.array([1.0, 1.0]), np.array([-1.0, -1.0]) sg = np.array([[1.0, -0.75], [-0.75, 1.0]]) X0 = rnd.multivariate_normal(mu0, sg, size=(N)).astype(floatX) X1 = rnd.multivariate_normal(mu1, sg, size=(N)).astype(floatX) Y0 = np.hstack((np.ones((N, 1)), np.zeros((N, 1)))).astype(floatX) Y1 = np.hstack((np.zeros((N, 1)), np.ones((N, 1)))).astype(floatX) # Permute the data and split to return. idx = rnd.permutation(2*N) X, Y = np.vstack((X0, X1))[idx], np.vstack((Y0, Y1))[idx] return shared(X[:Ntr], 'Xtr'), shared(Y[:Ntr], 'Ytr'), \ shared(X[Ntr:], 'Xte'), shared(Y[Ntr:], 'Yte')
def sample_gp_posterior_mean(gp, coords_ptr, coords_size, bounds_ptr, \ sample_x_ptr, num_samples, check_constraint = None, exp_temp = 10, MH_rate = 0.09): """ Sample by using the Metropolis method to get policies in the re-sampling phase. We treat the exponentiated (GP-mean*exp_temp) as unnormalized density. The proposal distribution is normal distribution: N(Mu, MH_rate*I). """ bounds = [(bounds_ptr[2*i], bounds_ptr[2*i+1]) for i in xrange(coords_size)] def check_violate_constraint(nx_pt, bounds, coords_size, check_constraint = None): for i in xrange(coords_size): if not (nx_pt[i] <= bounds[i][1] and nx_pt[i] >= bounds[i][0]): return True if not check_constraint is None: return check_constraint(nx_pt, bounds, coords_size) return False coords = zeros(coords_size) for i in xrange(coords_size): coords[i] = coords_ptr[i] import numpy.random as nr cur_pt = coords factor = MH_rate cov = eye(coords_size) for i in xrange(coords_size): cov[i,i] = (bounds[i][1] - bounds[i][0])**2*factor; for i in xrange(num_samples): for j in xrange(coords_size): sample_x_ptr[i*coords_size+j] = cur_pt[j] nx_pt = nr.multivariate_normal(cur_pt, cov) while check_violate_constraint(nx_pt, bounds, coords_size, check_constraint): nx_pt = nr.multivariate_normal(cur_pt, cov) mh_ratio = exp((- gp.posterior(cur_pt)[0] + gp.posterior(nx_pt)[0])*exp_temp) r = nr.uniform() if r < mh_ratio: cur_pt = nx_pt return 0
def setUp(self): self.mu = array([0, 0]) self.sig = eye(2) self.pnts = multivariate_normal(self.mu, self.sig, 1000) self.k = 16 self.niter = 10 self.model = DPMixtureModel(self.k, self.niter, 0, 1)
def plot_monte_carlo_ukf(): def f(x,y): return x+y, .1*x**2 + y*y mean = (0, 0) p = np.array([[32, 15], [15., 40.]]) # Compute linearized mean mean_fx = f(*mean) #generate random points xs, ys = multivariate_normal(mean=mean, cov=p, size=3000).T fxs, fys = f(xs, ys) plt.subplot(121) plt.gca().grid(b=False) plt.scatter(xs, ys, marker='.', alpha=.2, color='k') plt.xlim(-25, 25) plt.ylim(-25, 25) plt.subplot(122) plt.gca().grid(b=False) plt.scatter(fxs, fys, marker='.', alpha=0.2, color='k') plt.ylim([-10, 200]) plt.xlim([-100, 100]) plt.show()
def mixnormrnd(pi, mu, sigma, k): """Generate random variables from mixture of Guassians""" xs = [] for unused in range(k): j = sum(random() > cumsum(pi)) xs.append(multivariate_normal(mu[j], sigma[j])) return array(xs)
def simulate(self, ts_length=100): """ Simulate a time series of length ts_length, first drawing x_0 ~ N(mu_0, Sigma_0) Parameters ---------- ts_length : scalar(int), optional(default=100) The length of the simulation Returns ------- x : array_like(float) An n x ts_length array, where the t-th column is x_t y : array_like(float) A k x ts_length array, where the t-th column is y_t """ x0 = multivariate_normal(self.mu_0.flatten(), self.Sigma_0) w = np.random.randn(self.m, ts_length-1) v = self.C.dot(w) # Multiply each w_t by C to get v_t = C w_t # == simulate time series == # x = simulate_linear_model(self.A, x0, v, ts_length) if self.H is not None: v = np.random.randn(self.l, ts_length) y = self.G.dot(x) + self.H.dot(v) else: y = self.G.dot(x) return x, y
def simulate_mixed_logit(num_pers,predict_data,config): num=num_pers to_load=zeros(len(predict_data)) while num>0: exputils=zeros(len(predict_data)) for path_idx in range(len(predict_data)): vals=predict_data[path_idx] u=0 for i in range(len(config['fixed_coefficients'])): u=u+config['alpha'][i]*vals[config['fixed_coefficients'][i]] if config['use_random_coefficients']: beta=nr.multivariate_normal(config['latent_mu'],config['latent_sigma']) for i in range(len(beta)): beta[i]=config['random_transformations'][i](beta[i]) u=u+beta[i]*vals[config['random_coefficients'][i]] exputils[path_idx]=exp(u) if num>config['mixing_granularity'] and config['use_random_coefficients']: to_load=to_load+config['mixing_granularity']*exputils/sum(exputils) else: to_load=to_load+num*exputils/sum(exputils) if config['use_random_coefficients']: num=num-config['mixing_granularity'] else: num=0 return to_load
def sampleConditionalDistribution(self, indices, values): #Calculate cumulative indices in the mean vector counter = 0 cum_indices = [] for i in range(len(self.dims)): if i in indices: for j in range(counter,counter+self.dims[i]): cum_indices.append(j) counter += self.dims[i] #Mask with newly calculated indices state_mask = ones(self.state_dim,dtype=bool) state_mask[cum_indices] = False condition_mask = logical_not(state_mask) s11 = self.covar[state_mask][:,state_mask] s12 = self.covar[state_mask][:,condition_mask] s21 = self.covar[condition_mask][:,state_mask] s22 = self.covar[condition_mask][:,condition_mask] m1 = zeros((sum(state_mask),1)) m2 = zeros((sum(condition_mask),1)) #Project conditioned values value_projected = matrix([[]]).reshape((0,1)) for i in xrange(len(values)): value_projected = matrix(concatenate((value_projected,self.mean[indices[i]].log(values[i])))) #Calculate new mean m_prime = m1 + s12*s22.getI()*(value_projected-m2) s_prime = s11 - s12*s22.getI()*s21 sample = matrix(random.multivariate_normal(mean=m_prime.getT().tolist()[0], cov=s_prime)).getT() #Pack in order to return rest = list(set(range(len(self.dims)))-set(indices)) return packPoints(self.mean, [sample], rest, self.types, self.dims)[0]
def simulate(self, ts_length=100): """ Simulate a time series of length ts_length, first drawing x_0 ~ N(mu_0, Sigma_0) Parameters ---------- ts_length : scalar(int), optional(default=100) The length of the simulation Returns ------- x : array_like(float) An n x ts_length array, where the t-th column is x_t y : array_like(float) A k x ts_length array, where the t-th column is y_t """ x = np.empty((self.n, ts_length)) x[:, 0] = multivariate_normal(self.mu_0.flatten(), self.Sigma_0) w = np.random.randn(self.m, ts_length - 1) for t in range(ts_length - 1): x[:, t + 1] = self.A.dot(x[:, t]) + self.C.dot(w[:, t]) y = self.G.dot(x) return x, y
def _produceNewSample(self): """ returns a new sample, its fitness and its densities """ chosenOne = drawIndex(self.alphas, True) mu = self.mus[chosenOne] if self.useAnticipatedMeanShift: if len(self.allsamples) % 2 == 1 and len(self.allsamples) > 1: if not(self.elitism and chosenOne == self.bestChosenCenter): mu += self.meanShifts[chosenOne] if self.diagonalOnly: sample = normal(mu, self.sigmas[chosenOne]) else: sample = multivariate_normal(mu, self.sigmas[chosenOne]) if self.sampleElitism and len(self.allsamples) > self.windowSize and len(self.allsamples) % self.windowSize == 0: sample = self.bestEvaluable.copy() fit = self._oneEvaluation(sample) if ((not self.minimize and fit >= self.bestEvaluation) or (self.minimize and fit <= self.bestEvaluation) or len(self.allsamples) == 0): # used to determine which center produced the current best self.bestChosenCenter = chosenOne self.bestSigma = self.sigmas[chosenOne].copy() if self.minimize: fit = -fit self.allfitnesses.append(fit) self.allsamples.append(sample) return sample, fit
def sample_from_posterior_given_hypers_and_data(self, pred, n_samples=1, joint=True): if joint: predicted_mean, cov = self.predict(pred, full_cov=True) # This part depends on the data return npr.multivariate_normal(predicted_mean, cov, size=n_samples).T.squeeze() else: predicted_mean, var = self.predict(pred, full_cov=False) # This part depends on the data return np.squeeze(predicted_mean[:,None] + npr.randn(pred.shape[0], n_samples) * np.sqrt(var)[:,None])
def gibbs_sample(groups, votes, n_samples, n_burnin): """ Performs Gibbs Sampling over groups. Observations: - Each Variable object has a list of samples. Once a new sample is generated, the value used for the variable is the new sample. - After sampling, the values of latent variables are changed to empiric mean of samples. Args: groups: a dict of Group objects. votes: list of votes, each one represented as a dictionary, which is the training data. n_samples: the number of samples to obtain. n_burnin: number of initial samples to ignore. Returns: None. The samples are inserted into Variable objects. """ burn_count = 0 for _ in xrange(n_samples + n_burnin): for g_name in ['alpha', 'beta', 'xi', 'u', 'v', 'gamma', 'lambda']: group = groups[g_name] if isinstance(group, EntityArrayGroup): for variable in group.iter_variables(): mean, var = variable.get_cond_mean_and_var(groups, votes) sample = multivariate_normal(mean.reshape(-1), var) variable.add_sample(sample.reshape(sample.size, 1)) else: for variable in group.iter_variables(): mean, var = variable.get_cond_mean_and_var(groups, votes) sample = normal(mean, sqrt(var)) variable.add_sample(sample) if burn_count < n_burnin: burn_count += 1
def copula(num_samples, rho_mat, mu_mat, methods): """Copula procedure to generate an OTU table with corrs close to rho_mat. Inputs: num_samples - int, number of samples. rho_mat - 2d arr, symmetric positive definite matrix which specifies the correlation or covariation between the otu's in the table. mu_mat - 1d arr w/ len(num_otus), mean of otu for multivariate random call. methods - list of lists w/ len(num_otus), each list has a variable number of elements. the first element in each list is the scipy.stats.distributions function like lognorm or beta. this is the function that we draw values from for the actual otu. the remaining entries are the parameters for that function in order that the function requires them. """ num_otus = len(mu_mat) # draw from multivariate normal distribution with specified parameters. # transpose so that it remains otuXsample matrix. Z = multivariate_normal(mean=mu_mat, cov=rho_mat, size=num_samples).T # using the inverse cdf of the normal distribution find where each sample # value for each otu falls in the normal cdf. U = norm.cdf(Z) # make the otu table using the methods and cdf values. ppf_args[0] is the # distribution function (eg. lognorm) whose ppf function we will use # to transform the cdf vals into the new distribution. ppf_args[1:] is the # params of the function like a, b, size, loc etc. otu_table = array([ppf_args[0].ppf(otu_cdf_vals, *ppf_args[1:], size=num_otus) for ppf_args, otu_cdf_vals in zip(methods, U)]) return where(otu_table > 0, otu_table, 0)
def _sample_entity(self, X, mask, E, R, i, var_e, RE, RTE): _lambda = np.identity(self.n_dim) / var_e nz_r = mask[:, i, :].nonzero() nz_c = mask[:, :, i].nonzero() nnz_r = nz_r[0].size nnz_c = nz_c[0].size nnz_all = nnz_r + nnz_c self.features[:nnz_r] = RE[nz_r] self.features[nnz_r:nnz_all] = RTE[nz_c] self.Y[:nnz_r] = X[:, i, :][nz_r] self.Y[nnz_r:nnz_all] = X[:, :, i][nz_c] features = self.features[:nnz_all] Y = self.Y[:nnz_all] try: logit = LogisticRegression(penalty='l2', C=1.0 / var_e, fit_intercept=False) logit.fit(features, Y) mu = logit.coef_[0] prd = logit.predict_proba(features) _lambda += np.dot(features.T * (prd[:, 0] * prd[:, 1]), features) except: mu = np.zeros(self.n_dim) inv_lambda = np.linalg.inv(_lambda) E[i] = multivariate_normal(mu, inv_lambda)
def particle_movement(leftwheel, rightwheel, R, robot): if 'particles' not in robot: abort(404) particles = array(robot['particles']) dd = (leftwheel + rightwheel) / 2 dh = (rightwheel - leftwheel) / ROBOT_RADIUS z = zeros((len(particles[0]),)) noises = multivariate_normal(z, R, particles.shape[0]) new_particles = zeros(particles.shape) for i, (particle, noise) in enumerate(zip(particles, noises)): x = particle[0] y = particle[1] h = particle[2] new_particles[i] = array([ x + dd*cos(h), y + dd*sin(h), h + dh ]) + noise return { 'key': robot['key'], 'type': 'particle', 'particles': new_particles.tolist(), 'mu': mean(new_particles, 0).reshape((-1, 1)).tolist(), 'sigma': cov(new_particles, rowvar=0).tolist() }
def _sample_relation(self, X, mask, E, R, k, EXE, var_r): if self.approx_diag: _lambda = np.ones(self.n_dim ** 2) / var_r else: _lambda = np.identity(self.n_dim ** 2) / var_r kron = EXE[mask[k].flatten() == 1] Y = X[k][mask[k] == 1].flatten() if len(np.unique(Y)) == 2: logit = LogisticRegression(penalty='l2', C=1.0 / var_r, fit_intercept=False) logit.fit(kron, Y) mu = logit.coef_[0] prd = logit.predict_proba(kron) if self.approx_diag: _lambda += np.sum(kron.T ** 2 * prd[:, 0] * prd[:, 1], 1) else: _lambda += np.dot(kron.T * (prd[:, 0] * prd[:, 1]), kron) else: mu = np.zeros(self.n_dim ** 2) if self.approx_diag: inv_lambda = 1. / _lambda R[k] = np.random.normal(mu, inv_lambda).reshape(R[k].shape) else: inv_lambda = np.linalg.inv(_lambda) R[k] = multivariate_normal(mu, inv_lambda).reshape(R[k].shape)
def resample(self, size=None): """ Randomly sample a dataset from the estimated pdf. Parameters ---------- size : int, optional The number of samples to draw. If not provided, then the size is the same as the underlying dataset. Returns ------- resample : (self.d, `size`) ndarray The sampled dataset. """ if size is None: size = self.n norm = transpose(multivariate_normal(zeros((self.d,), float), self.covariance, size=size)) indices = randint(0, self.n, size=size) means = self.dataset[:, indices] return means + norm
def _newQueryStateFromCtm(data, model): import model.ctm_bohning as ctm ctm_model = ctm.newModelAtRandom(data, model.K, VocabPrior, model.dtype) ctm_query = ctm.newQueryState(data, model) ctm_plan = ctm.newTrainPlan(200, epsilon=1, logFrequency=100, debug=False) ctm_model, ctm_query, (_, _, _) = ctm.train(data, ctm_model, ctm_query, ctm_plan) model.vocab[:,:] = ctm_model.vocab model.topicCov[:,:] = ctm_model.sigT model.topicMean[:] = ctm_model.topicMean K, vocab, dtype = model.K, model.vocab, model.dtype D,T = data.words.shape assert T == vocab.shape[1], "The number of terms in the document-term matrix (" + str(T) + ") differs from that in the model-states vocabulary parameter " + str(vocab.shape[1]) docLens = np.squeeze(np.asarray(data.words.sum(axis=1))) outMeans = ctm_query.means outVarcs = np.ones((D,K), dtype=dtype) inMeans = np.ndarray(shape=(D,K), dtype=dtype) for d in range(D): inMeans[d,:] = rd.multivariate_normal(outMeans[d,:], model.topicCov) inVarcs = np.ones((D,K), dtype=dtype) inDocCov = np.ones((D,), dtype=dtype) return QueryState(outMeans, outVarcs, inMeans, inVarcs, inDocCov, docLens)
def _do_plot_test(): from numpy.random import multivariate_normal p = np.array([[32, 15],[15., 40.]]) x,y = multivariate_normal(mean=(0,0), cov=p, size=5000).T sd = 2 a,w,h = covariance_ellipse(p,sd) print (np.degrees(a), w, h) count = 0 color=[] for i in range(len(x)): if _is_inside_ellipse(x[i], y[i], 0, 0, a, w, h): color.append('b') count += 1 else: color.append('r') plt.scatter(x,y,alpha=0.2, c=color) plt.axis('equal') plot_covariance_ellipse(mean=(0., 0.), cov = p, std=sd, facecolor='none') print (count / len(x))
def init_filter(self, R, P): self.R = R self.P = P if self.known_init_pos: self.particles = multivariate_normal(self.state, P, N_PARTICLES) self.particles[:,2] = np.random.uniform(-self.bound[4], self.bound[4], N_PARTICLES) self.particles[:,3] = np.random.uniform(-self.bound[5], self.bound[5], N_PARTICLES) else: self.particles = uniform_init(x_low=self.bound[0], x_high=self.bound[1], y_low=self.bound[2], y_high=self.bound[3], v_x=self.bound[4], v_y=self.bound[5], n=N_PARTICLES) self.weights = np.repeat(1/N_PARTICLES, N_PARTICLES) self.state_estimate = np.average(self.particles, axis=0, weights=self.weights) self.z_hat = self.state_estimate[:2]
def _generate_data(cls, n, d, untreated_outcome, treatment_effect, propensity): """Generates population data for given untreated_outcome, treatment_effect and propensity functions. Parameters ---------- n (int): population size d (int): number of covariates untreated_outcome (func): untreated outcome conditional on covariates treatment_effect (func): treatment effect conditional on covariates propensity (func): probability of treatment conditional on covariates """ # Generate covariates X = multivariate_normal(np.zeros(d), np.diag(np.ones(d)), n) # Generate treatment T = np.apply_along_axis(lambda x: binomial(1, propensity(x), 1)[0], 1, X) # Calculate outcome Y0 = np.apply_along_axis(lambda x: untreated_outcome(x), 1, X) treat_effect = np.apply_along_axis(lambda x: treatment_effect(x), 1, X) Y = Y0 + treat_effect * T return (X, T, Y)
def predict(self): """ Predict next position. """ N = self.N for i, s in enumerate(self.sigmas): self.sigmas[i] = self.fx(s, self.dt) e = multivariate_normal(self._mean, self.Q, N) self.sigmas += e self.x = np.mean(self.sigmas, axis=0) P = 0 for y in (self.sigmas - self.x): P += outer(y, y) self.P = P / (N - 1) # save prior self.x_prior = np.copy(self.x) self.P_prior = np.copy(self.P)
def spawn(self, errcov=0.0, names=None): """ generate one LightCurve for which the lightcurve values are the sum of the original ones and gaussian variates from gaussian errors. """ # _zylclist = list(self.zylclist) # copy the original list _zylclist = deepcopy(self.zylclist) # copy the original list for i in xrange(self.nlc): e = np.atleast_1d(_zylclist[i][2]) nwant = e.size ediag = np.diag(e * e) if errcov == 0.0: ecovmat = ediag else: temp1 = np.repeat(e, nwant).reshape(nwant, nwant) temp2 = (temp1 * temp1.T - ediag) * errcov ecovmat = ediag + temp2 et = multivariate_normal(np.zeros_like(e), ecovmat) _zylclist[i][1] = _zylclist[i][1] + et if names is None: names = ["-".join([r, "mock"]) for r in self.names] return (LightCurve(_zylclist, names=names))
def simulation(rng_seed, n, p, rho): """ :param rng_seed: random seed :param n: number of samples :param p: number of features :return: dictionary storing all infos """ print("Simulate negative binomial counts regression\n n=%d, p=%d, rho=%3.2f" % (n, p, rho)) seed(rng_seed) X_mu = normal(0, .1, p) #X_Sigma = diag(ones(p)) # simulate correlated matrix temp = np.abs(np.repeat([range(1, p+1)], p, axis=0).transpose() - np.repeat([range(1, p+1)], p, axis=0)) X_Sigma = np.power(rho, temp) # draw independent samples from MVN(X_mu, X_Sigma) X = multivariate_normal(X_mu, X_Sigma, n) # sample from bernoulli and uniform for coefficient beta and model space gamma opt_gamma = binomial(1, 0.15, p) # model gamma opt_beta = uniform(-2, 2, p) # drop all elements in beta whose absolute value less than 0.5 opt_gamma &= abs(opt_beta) > 0.5 opt_beta *= opt_gamma # coefficients opt_beta0 = 2 # bias opt_r = 1 # over-dispersion parameter opt_z = np.dot(X, opt_beta) + opt_beta0 opt_lam = gamma(opt_r, exp(opt_z), n) y = poisson(opt_lam, n) opt_omega = expect_omega(opt_z, y, opt_r) # put everything into a dictionary and return back negative_binomial_dict = {"X": X, "y": y, "opt_beta": opt_beta, "opt_beta0": opt_beta0, "opt_r": opt_r, "opt_gamma": opt_gamma, "opt_omega": opt_omega, "opt_m": np.sum(opt_gamma), "opt_model": np.nonzero(opt_gamma), "seed": ran_seed} return negative_binomial_dict
def simulate(self, ts_length=100, random_state=None): r""" Simulate a time series of length ts_length, first drawing .. math:: x_0 \sim N(\mu_0, \Sigma_0) Parameters ---------- ts_length : scalar(int), optional(default=100) The length of the simulation random_state : int or np.random.RandomState, optional Random seed (integer) or np.random.RandomState instance to set the initial state of the random number generator for reproducibility. If None, a randomly initialized RandomState is used. Returns ------- x : array_like(float) An n x ts_length array, where the t-th column is :math:`x_t` y : array_like(float) A k x ts_length array, where the t-th column is :math:`y_t` """ random_state = check_random_state(random_state) x0 = multivariate_normal(self.mu_0.flatten(), self.Sigma_0) w = random_state.randn(self.m, ts_length-1) v = self.C.dot(w) # Multiply each w_t by C to get v_t = C w_t # == simulate time series == # x = simulate_linear_model(self.A, x0, v, ts_length) if self.H is not None: v = random_state.randn(self.l, ts_length) y = self.G.dot(x) + self.H.dot(v) else: y = self.G.dot(x) return x, y
def create_mock_classification_data(mus, sigs, ns, rseed=None, verbose=False): ''' Create mock classification dataset using Gaussian distributions Data contains 'n' points distributed in 'c' classes with 'd' features TODO: Add feature correlation via correlation matrices @params: mus - Length 'c' list of means, each entry is an array of length 'd' sigs - Length 'c' list of standard deviations, each entry is an array of length 'd' ns - Length 'c' list of integer number of members of each class rseed - Random number seed ''' from numpy import array, diag, append from numpy.random import seed, multivariate_normal if verbose: print('Creating mock classification dataset') print('Number of features:', len(mus[0])) print('Number of classes:', len(ns)) print('Total number of entries:', sum(ns)) print() x = array([]) if rseed is not None: seed(rseed) for i, (mu, sig, n) in enumerate(zip(mus, sigs, ns)): # mus, sigs, ns must be the same length if verbose: print('Class %d members: %d'%(i, n)) print('Mean:', mu) print('Standard deviation:', sig) print() y = multivariate_normal(mean=mu, cov=diag(sig), size=n) if i==0: x = y.copy() else: x = append(x, y, axis=0) labels = [] for i, n in enumerate(ns): labels += n*['c'+str(i)] # Label could be less boring than 'i' data = {'class': labels} for i in range(len(ns)): data['x%d'%(i+1)] = x[:, i] df = pd.DataFrame.from_dict(data) df = shuffle(df) # Shuffle entries return df
def sample_user_hyperparameter(self): U_bar = np.mean(self.U, axis=0) S_bar = np.zeros((self.D, self.D)) for i in range(self.N): S_bar += np.outer(self.U[i, :], self.U[i, :]) beta0 = self.beta0 + self.N mu0 = (self.beta0 * self.mu0 + self.N * U_bar) / beta0 v0 = self.v0 + self.N # Note that our choice of W0 is the identity matrix so inverse has no difference W0_inverse = inv(self.W0) + S_bar + (self.beta0*self.N)/(self.beta0+self.N)\ *np.outer(self.mu0-U_bar, self.mu0-U_bar) W0 = inv(W0_inverse) # Sample Lambda_U from Wishart Lambda_U = wishart.rvs(v0, W0) # Sample muU from Gaussian precision = inv(beta0 * Lambda_U) Mu_U = multivariate_normal(mean=mu0, cov=precision) return Mu_U, Lambda_U
def Multiple_Descendent_Proposal(particles, y, drift, q, multiple_des=4): track_prop = [] weight = [] for par in particles: for k in range(multiple_des): xt, yt = par Rt = np.matrix([[-xt, yt], [-yt, -xt]]) / np.sqrt((xt**2 + yt**2)) Zt = random.multivariate_normal( np.zeros(2), q**2 * np.matrix([[1, 0], [0, k**2]])) Et = np.array(Rt.T) @ Zt xnew = xt + drift[0] + Et[0] ynew = yt + drift[1] + Et[1] track_prop.append([xnew, ynew]) weight.append(norm.logpdf(f(xnew, ynew)[0], y, scale=delta)) track_prop = np.array(track_prop) weight = np.array(weight) weight = weight - max(weight) weight = np.exp(weight) weight = weight / np.sum(weight) #print('dp', np.sum(weight**2)) return track_prop, weight
def multivariate_k(mu, sigma, p, nu): """ Generate a sample drawn from a multivariate t distribution. Parameters ---------- mu : 1-d array of size m mean of the distribution sigma : 2-d array of size m*m shape matrix with det = 1 p : float > 0 scale parameter nu : integer > 0 Degree of freedom of the distribution Returns ------- x : 1-d array of size m sample generated """ return mu + multivariate_normal(np.zeros(len(mu)), p * sigma) * np.sqrt( gamma(nu, 1 / nu))
def null_model(num_samples, dimension=1, rho=0): data_z = np.reshape(uniform(0, 5, num_samples * dimension), (num_samples, dimension)) coin_flip_x = np.random.choice([0, 1], replace=True, size=num_samples) coin_flip_y = np.random.choice([0, 1], replace=True, size=num_samples) mean_noise = [0, 0] cov_noise = [[1, 0], [0, 1]] noise_x, noise_y = multivariate_normal(mean_noise, cov_noise, num_samples).T data_x = zeros(num_samples) data_x[coin_flip_x == 0, ] = 1.7 * data_z[coin_flip_x == 0, 0] data_x[coin_flip_x == 1, ] = -1.7 * data_z[coin_flip_x == 1, 0] data_x = data_x + noise_x data_y = zeros(num_samples) data_y[coin_flip_y == 0, ] = (data_z[coin_flip_y == 0, 0] - 2.7)**2 data_y[coin_flip_y == 1, ] = -(data_z[coin_flip_y == 1, 0] - 2.7)**2 + 13 data_y = data_y + noise_y data_x = np.reshape(data_x, (num_samples, 1)) data_y = np.reshape(data_y, (num_samples, 1)) return data_x, data_y, data_z
def make_data(m=100): # scaled data from numpy import array from numpy.random import multivariate_normal from myutils.datasets import make_legitimate_covariance_matrix Σ = [ [7, -5, 4, 8], # RSq will be about 0.85 [-5, 14, -9, 0], [4, -9, 15, -5], [8, 0, -5, 19] ] Σ = array(Σ, dtype='i') Σ = Σ | Σ.transpose() Σ = make_legitimate_covariance_matrix( ndim=4) # use this covariance matrix instead of the above one mx = multivariate_normal(mean=[0, 0, 0, 0], cov=Σ, size=m) mx = scale(mx) X, y = split(mx) return X, y
def main(): r = 0.9999 data = nr.multivariate_normal([0, 0], [[1, r], [r, 1]], 100) print "Entropy: " print "Ground Truth = ", log(2 * pi * exp(1)) + 0.5 * log(1 - r**2) print "LNN: H(X) = ", lnn.entropy(data) print "KDE: H(X) = ", lnn.KDE_entropy(data) print "KL: H(X) = ", lnn.KL_entropy(data) print "LNN(1st order): H(X) = ", lnn.LNN_1_entropy(data), "\n" print "Mutual Information: " print "Ground Truth = ", -0.5 * log(1 - r**2) print "LNN: I(X;Y) = ", lnn.mi(data, split=1) print "KDE: I(X;Y) = ", lnn._3KDE_mi(data, split=1) print "3KL: I(X;Y) = ", lnn._3KL_mi(data, split=1) print "KSG: I(X;Y) = ", lnn._KSG_mi(data, split=1) print "LNN(1st order): I(X;Y) = ", lnn._3LNN_1_mi(data, split=1) print "LNN(1st order, KSG trick): I(X;Y) = ", lnn._3LNN_1_KSG_mi(data, split=1) print "LNN(2nd order, KSG trick): I(X;Y) = ", lnn._3LNN_2_KSG_mi(data, split=1)
def predict(self, Q=None): """ Predict next position. """ if Q == None: Q = self.Q if np.isscalar(Q): Q = eye(self.dim_x) * Q N = self.N for i, s in enumerate(self.sigmas): self.sigmas[i] = self.fx(s, self.dt) # e = multivariate_normal(self._mean, self.Q, N) e = multivariate_normal(self._mean, Q, N) self.sigmas += e self.x = np.mean(self.sigmas, axis=0) self.P = outer_product_sum(self.sigmas - self.x) / (N - 1) # save prior self.x_prior = np.copy(self.x) self.P_prior = np.copy(self.P)
def _internal_dynamics(Mprior, Vparent, Achild, Vchild, N=2): """ Sample from dynamics conditional of an internal node in tree :param Mprior: prior of dynamics :param Vparent: prior column covariance :param Achild: sum of realization of dynamics of children :param Vchild: children column covariance :return: Sample from posterior """ assert Mprior.shape == Achild.shape precision_parent = np.linalg.inv( np.kron(Vparent, np.eye(Achild[:, 0].size))) precision_child = np.linalg.inv(np.kron(Vchild, np.eye(Achild[:, 0].size))) posterior_sigma = np.linalg.inv(precision_parent + N * precision_child) posterior_mu = posterior_sigma @ ( precision_parent @ Mprior.flatten(order='F')[:, na] + precision_child @ Achild.flatten(order='F')[:, na]) return npr.multivariate_normal(posterior_mu.flatten(), posterior_sigma).reshape(Achild.shape, order='F')
def sample_mog(nsamp, params, component=None, shuffle=False): """Sample from a mixture model.""" if component == None: nums = random.multinomial(nsamp, np.exp(params['logalpha'])) else: nums = np.zeros(len(params['logalpha']), dtype=int) nums[component] = nsamp D = params['sigma'].shape[0] samples = np.empty((D, nsamp)) cnt = 0 for cmpt in xrange(len(nums)): mu = params['mu'][:, cmpt] sigma = params['sigma'][:, :, cmpt] s = cnt t = cnt + nums[cmpt] samples[:, s:t] = random.multivariate_normal(mu, sigma, (nums[cmpt], )).T cnt = t if shuffle: samples = np.asarray(samples[:, np.random.shuffle(np.arange(nsamp))]) return samples
def pw_normal(n_samples=200, n_bkps=3): """Return a 2D piecewise Gaussian signal and the associated changepoints. Args: n_samples (int, optional): signal length n_bkps (int, optional): number of change points Returns: tuple: signal of shape (n_samples, 2), list of breakpoints """ # breakpoints bkps = draw_bkps(n_samples, n_bkps) # we create the signal signal = np.zeros((n_samples, 2), dtype=float) cov1 = np.array([[1, 0.9], [0.9, 1]]) cov2 = np.array([[1, -0.9], [-0.9, 1]]) for sub, cov in zip(np.split(signal, bkps), cycle((cov1, cov2))): n_sub, _ = sub.shape sub += rd.multivariate_normal([0, 0], cov, size=n_sub) return signal, bkps
def multivariate_inverse_gaussian(mu, sigma, p, beta): """ Generate a sample drawn from a multivariate t distribution. Parameters ---------- mu : 1-d array of size m mean of the distribution sigma : 2-d array of size m*m shape matrix with det = 1 p : float > 0 scale parameter beta : float > 0 shape parameter Returns ------- x : 1-d array of size m sample generated """ return mu + multivariate_normal(np.zeros(len(mu)), p * sigma) * np.sqrt( wald(1, beta))
def test_uncertainty_correlation(self): seed(1) sample_size = 2**15 for expected in [0, 0.75]: # Make the error distribution y_true = uniform(0, 1, sample_size) # Make the errors and uncertainties draw = multivariate_normal([0, 0], [[1, expected], [expected, 1]], sample_size) # Add the errors, and separate out the standard deviations y_pred = y_true + [d[0] * normal(0, 1) for d in draw] y_std = [abs(d[1]) for d in draw] # Test with a very large tolerance for now measured_corr = uncertainty_correlation(y_true, y_pred, y_std) corr_error = abs(measured_corr - expected) self.assertLess( corr_error, 0.25, 'Error for {:.2f}: {:.2f}'.format(expected, corr_error))
def generate_trajectory(A, Q, R, starting_pt, depth, leaf_path, K, T, D_in, noise=True, u=None, D_bias=None): if u is D_bias is None: u = np.ones((1, T)) D_bias = 1 x = np.zeros((D_in, T + 1)) x[:, 0] = starting_pt z = np.zeros(T + 1).astype(int) for t in range(T): log_p = compute_leaf_log_prob(R, x[:, t], K, depth, leaf_path) p_unnorm = np.exp(log_p - np.max(log_p)) p = p_unnorm / np.sum(p_unnorm) if noise: # Stochastically choose the discrete latent and add noise to continuous latent choice = npr.multinomial(1, p.ravel(), size=1) z[t] = np.where(choice[0, :] == 1)[0][0].astype(int) x[:, t + 1] = (A[:, :-D_bias, z[t]] @ x[:, t][:, na] + \ A[:, -D_bias:, z[t]] @ u[:, t][:, na] + \ npr.multivariate_normal(np.zeros(D_in), Q[:, :, z[t]])[:, na]).flatten() else: # Use Bayes classifier to choose discrete latent state and add no noise to continuous latent states z[t] = np.argmax(choice) x[:, t + 1] = (A[:, :-D_bias, z[t]] @ x[:, t][:, na] + \ A[:, -D_bias:, z[t]] @ u[:, t][:, na]).flatten() log_p = compute_leaf_log_prob(R, x[:, -1], K, depth, leaf_path) p_unnorm = np.exp(log_p - np.max(log_p)) p = p_unnorm / np.sum(p_unnorm) choice = npr.multinomial(1, p.ravel(), size=1) z[-1] = np.where(choice[0, :] == 1)[0][0] return x, z
def main(): n = 1000 cov = [[1, 0.9], [0.9, 1]] beta = 0.9 p_con, p_dis = 0.5, 0.5 gt = (-p_con * 0.5 * log(np.linalg.det(cov)) + p_dis * (log(2) + beta * log(beta) + (1 - beta) * log(1 - beta)) - p_con * log(p_con) - p_dis * log(p_dis)) x_con, y_con = nr.multivariate_normal([0, 0], cov, int(n * p_con)).T x_dis = nr.binomial(1, 0.5, int(n * p_dis)) y_dis = (x_dis + nr.binomial(1, 1 - beta, int(n * p_dis))) % 2 x_dis, y_dis = 2 * x_dis - np.ones(int(n * p_dis)), 2 * y_dis - np.ones( int(n * p_dis)) x = np.concatenate((x_con, x_dis)).reshape((n, 1)) y = np.concatenate((y_con, y_dis)).reshape((n, 1)) print("Ground Truth = ", gt) print("Mixed KSG: I(X:Y) = ", mixed.Mixed_KSG(x, y)) print("Partitioning: I(X:Y) = ", mixed.Partitioning(x, y)) print("Noisy KSG: I(X:Y) = ", mixed.Noisy_KSG(x, y)) print("KSG: I(X:Y) = ", mixed.KSG(x, y))
def generate_data(n=400): INPUT_FEATURES = 2 CLASSES = 3 means = [(-1, 0), (2, 4), (3, 1)] cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])] alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES) minX, maxX = means[0][0], means[0][0] minY, maxY = means[0][1], means[0][1] for i in range(n): for klass in range(CLASSES): features = multivariate_normal(means[klass], cov[klass]) x, y = features minX, maxX = min(minX, x), max(maxX, x) minY, maxY = min(minY, y), max(maxY, y) alldata.addSample(features, [klass]) return { 'minX': minX, 'maxX': maxX, 'minY': minY, 'maxY': maxY, 'd': alldata }
def initialize(self, x, P): """ Initializes the filter with the specified mean and covariance. Only need to call this if you are using the filter to filter more than one set of data; this is called by __init__ Parameters ---------- x : np.array(dim_z) state mean P : np.array((dim_x, dim_x)) covariance of the state """ if x.ndim != 1: raise ValueError('x must be a 1D array') self.sigmas = multivariate_normal(mean=x, cov=P, size=self.N) self.x = x self.P = P
def test(): data = multivariate_normal([0, 0], [[1, 2], [2, 5]], 100) ### PCA pc_base = pca(data, base_num = 1)[0] ### Plotting fig = pl.figure() fig.add_subplot(1,1,1) pl.axvline(x=0, color = "#000000") pl.axhline(y=0, color = "#000000") ### Plot data pl.scatter(data[:, 0], data[:, 1]) ### Draw the 1st principal axis pc_line = array([-3., 3.]) * (pc_base[1] / pc_base[0]) pl.arrow(0, 0, -pc_base[0] * 2, -pc_base[1] * 2, fc = "r", width = 0.15, head_width = 0.45) pl.plot([-3, 3], pc_line, "r") ### Settings pl.xticks(size = 15) pl.yticks(size = 15) pl.xlim([-3, 3]) pl.tight_layout() pl.show()
def setUpClass(cls): # Generate data # DGP constants cls.d = 5 cls.n = 1000 cls.n_test = 200 cls.beta = np.array([0.25, -0.38, 1.41, 0.50, -1.22]) # Test data cls.X_test = multivariate_normal( np.zeros(cls.d), np.diag(np.ones(cls.d)), cls.n_test) # Constant treatment effect and propensity cls.const_te_data = TestMetalearners._generate_data( cls.n, cls.d, cls._untreated_outcome, treatment_effect=TestMetalearners._const_te, propensity=lambda x: 0.3) # Heterogeneous treatment and propensity cls.heterogeneous_te_data = TestMetalearners._generate_data( cls.n, cls.d, cls._untreated_outcome, treatment_effect=TestMetalearners._heterogeneous_te, propensity=lambda x: (0.8 if (x[2] > -0.5 and x[2] < 0.5) else 0.2))
def random_mean_and_matrix_semidefinite(self,random_means,random_covariances,array = True, num_points = 1): ''' Definition: Parameters ---------- random_means: Matrix Have in each row a two column vector [(low = a,high = b)] Example: means = [[0,1],[2,3],[6,7]] Where each row indicates in which range the uniform random generator can takes values random_covariances: Matrix Is a square matrix, where each row has a column vector, who has inside a vector with two components Example: A matrix 3x3. covariance = [[[a11,b11],[a12,b12],[a13,b13]],[[a21,b21],[a22,b22],[a23,b23]],[[a31,b31],[a32,b32],[a33,b33]]] Where each row indicates in which range the uniform random generator can takes values a = [[[1,2],[-5,5],[-100,6]],[[135,683],[2,285],[-135,13]],[[58,135],[16,35],[5,68478]]] ''' mean = [] for random_mean in random_means: mean.append(random.uniform(low=random_mean[0],high=random_mean[1])) if array == True: covariance = random.rand( random_means.shape[0] , random_means.shape[0])*(random_covariances[1] - random_covariances[0]) + random_covariances[0] covariance = covariance * covariance.transpose() else: covariance = [] for random_covariance in random_covariances: covariance.append([]) for points in random_covariance: covariance[-1].append(random.uniform(low=points[0],high=points[1])) covariance = np.array(covariance) #A*A' is a semedefinite matrix covariance = covariance*covariance.transpose() return [mean,multivariate_normal(mean,covariance,num_points)]
def init_toy_data(num_samples, num_features, num_classes, seed=3): # num_samples: number of samples *per class* # num_features: number of features (excluding bias) # num_classes: number of class labels # seed: random seed np.random.seed(seed) X = np.zeros((num_samples * num_classes, num_features)) y = np.zeros(num_samples * num_classes) for c in range(num_classes): # initialize multivariate normal distribution for this class: # choose a mean for each feature means = uniform(low=-10, high=10, size=num_features) # choose a variance for each feature var = uniform(low=1.0, high=5, size=num_features) # for simplicity, all features are uncorrelated (covariance between any two features is 0) cov = var * np.eye(num_features) # draw samples from normal distribution X[c * num_samples:c * num_samples + num_samples, :] = multivariate_normal(means, cov, size=num_samples) # set label y[c * num_samples:c * num_samples + num_samples] = c return X, y
def generate_transformed_data(experiments, reflections, sigma): from dials_scratch.jmp.stills.potato import PotatoOnEwaldSphere from numpy.random import multivariate_normal b1 = 9 / (2 * 0.01) b2 = 9 / (2 * 0.01) s0 = matrix.col(experiments[0].beam.get_s0()) s1_obs = flex.vec3_double() s2_obs = flex.vec3_double() I_obs = flex.double() data_list = [] for i in range(len(reflections)): h = matrix.col(reflections[i]["miller_index"]) s2 = reflections[i]["s1"] model = PotatoOnEwaldSphere(1 / s0.length(), s2, sigma) mup = model.conditional_mean() sigmap = model.conditional_sigma() scale = model.scale_factor() data = flex.double(flex.grid(9, 9)) points = multivariate_normal((0, 0), matrix.sqr(sigmap).as_list_of_lists(), int(scale * 1000)) for (x, y) in points: jj = int(4.5 + b1 * y) ii = int(4.5 + b2 * x) if ii >= 0 and jj > 0 and ii < data.all()[1] and jj < data.all( )[0]: data[jj, ii] += 1 print(i, len(reflections), data.as_numpy_array()) data_list.append(data) return data_list
def generate_synthetic_logistic_data(n, p, L, blk_nnz, gcov, nstd): # Generates synthetic data for the logistic regression, using the example # from [Friedman10] # n : # of observations # p : # of predictors # L : # of blocks # blk_nnz : # of non-zero coefs. in each block # gcov : correlation within groups # nstd : standard deviation of the added noise # size of each block (assumed to be an integer) pl = p / L # generating the coefficients (betas) coefs = np.zeros((p, 1)) for (i, nnz) in enumerate(blk_nnz): blkcoefs = np.zeros((pl, 1)) blkcoefs[0:nnz] = np.sign(rand(nnz, 1) - 0.5) coefs[pl * i:pl * (i + 1)] = permutation(blkcoefs) # generating the predictors mu = np.zeros(p) gsigma = gcov * np.ones((pl, pl)) np.fill_diagonal(gsigma, 1.0) Sigma = np.kron(np.eye(L), gsigma) # the predictors come from a standard Gaussian multivariate distribution X = multivariate_normal(mu, Sigma, n) # linear function of the explanatory variables in X, plus noise t = np.dot(X, coefs) + randn(n, 1) * nstd # applying the logit Pr = 1 / (1 + np.exp(-t)) # The response variable y[i] is a Bernoulli random variable taking # value 1 with probability Pr[i] y = rand(n, 1) <= Pr # we want each _column_ in X to represent a feature vector # y and coefs should be also 1D arrays return X.T, y.flatten(), coefs.flatten()
def MCMC(N,lower_threshold,upper_threshold,rho_Gaussian_Process): markov_chain = np.array([]) B_0=npr.normal(loc=0,scale=np.sqrt(t)) B = npr.normal(loc=0,scale=np.sqrt(t),size=I0) X = np.sqrt(rho_correlation)*B_0 + np.sqrt(1-rho_correlation)*B #(B_0_motion,X) = motion_generator() #initialize a good x0 while (loss(X)<=lower_threshold): B_0=npr.normal(loc=0,scale=np.sqrt(t)) B = npr.normal(loc=0,scale=np.sqrt(t),size=I0) X = np.sqrt(rho_correlation)*B_0 + np.sqrt(1-rho_correlation)*B for i in range(N): Y = npr.multivariate_normal(mean=np.zeros(I0),cov=M_cov) X_intermediate=rho_Gaussian_Process*X+np.sqrt(1-rho_Gaussian_Process**2)*Y l=loss(X_intermediate) if (l>lower_threshold): X=X_intermediate markov_chain=np.append(markov_chain,l) else: markov_chain=np.append(markov_chain,loss(X)) count=len(markov_chain[markov_chain>upper_threshold]) return (markov_chain,count/N)