示例#1
0
 def generate_data(self,n=1e4, k=2, ncomps=3, seed=1):
     
     npr.seed(seed)
     data1_concat = []
     data2_concat = []
     labels1_concat = []
     labels2_concat = []
 
     for j in xrange(ncomps):
         mean = gen_mean[j]
         sd = gen_sd[j]
         corr = gen_corr[j]
 
         cov = np.empty((k, k))
         cov.fill(corr)
         cov[np.diag_indices(k)] = 1
         cov *= np.outer(sd, sd)
 
         num1 = int(n * group_weights1[j])
         num2 = int(n * group_weights2[j])
         rvs1 = multivariate_normal(mean, cov, size=num1)
         rvs2 = multivariate_normal(mean, cov, size=num2)
         data1_concat.append(rvs1)
         data2_concat.append(rvs2)
         labels1_concat.append(np.repeat(j, num1))
         labels2_concat.append(np.repeat(j, num2))
 
     return ([np.concatenate(labels1_concat), np.concatenate(labels2_concat)],
             [np.concatenate(data1_concat, axis=0),
              np.concatenate(data2_concat, axis=0)])
示例#2
0
def genDataset(N, separable = True):
    n = N//2
    if separable:
        mu1 = np.array([0, 2])
        mu2 = np.array([2, 0])
        sigma = np.array([[0.8, 0.6], [0.6, 0.8]])
        X1 = rn.multivariate_normal(mu1, sigma, N)
        Y1 = np.ones(len(X1))
        X2 = rn.multivariate_normal(mu2, sigma, N)
        Y2 = np.ones(len(X2)) * -1
        X = np.vstack((X1, X2))
        Y = np.hstack((Y1, Y2))
    else:
        X = np.random.randn(300, 2)
        Y = np.logical_xor(X[:, 0] > 0, X[:, 1] > 0)

        mu1 = [-1, 2]
        mu2 = [1, -1]
        mu3 = [4, -4]
        mu4 = [-4, 4]
        sigma = [[1.0,0.8], [0.8, 1.0]]
        X1 = rn.multivariate_normal(mu1, sigma, n)
        X1 = np.vstack((X1, np.random.multivariate_normal(mu3, sigma, n)))
        Y1 = np.ones(len(X1))
        X2 = rn.multivariate_normal(mu2, sigma, n)
        X2 = np.vstack((X2, np.random.multivariate_normal(mu4, sigma, n)))
        Y2 = np.ones(len(X2)) * -1
        X = np.vstack((X1, X2))
        Y = np.hstack((Y1, Y2))
    return X, Y
示例#3
0
    def step(self, y, predict_P=False, index=1):
        X = self.x_prior
        states = X[:2, :]
        params = X[2:, :]
        
        s_std = std(X[index].A1) 
        
        tmp_ws = as_array([norm.pdf(y, x[0, index], s_std) for x in states.T])
        n_weights = self.weights * tmp_ws
        sum_weights = n_weights.sum()
    
        if sum_weights != 0:
            n_weights /= sum_weights
            neff = 1.0 / (n_weights ** 2).sum() 
        
        if sum_weights == 0 or neff < self.num_part/2.0:
            idx = choice(range(X.shape[1]), X.shape[1], p=self.weights)
            self.weights = tile(as_array(1.0 / self.num_part), self.num_part)
            
            self.x_post = X[:, idx]
        else:
            self.x_post = X
            self.weights = n_weights

        p_mean = average(params, axis=1, weights=self.weights).A1
        p_cov = cov(params, aweights=self.weights)
        self.x_post[2:, :] = multivariate_normal(p_mean, p_cov, X.shape[1]).T
 
        for i, x in enumerate(self.x_post[2:, :].T):
            if x.any() < 0:
                while True:
                    new = multivariate_normal(p_mean, p_cov, 1).T
                    if new.all() > 0 and new[0, 1] > new[0, 2]:
                        self.x_post[2:, i] = new
                        break
示例#4
0
def rdm_pnt(nbr_cluster):
    from numpy.random import uniform
    from numpy.random import multivariate_normal
    from numpy import savetxt

    import sys

    x = uniform(50, 950)
    y = uniform(50, 950)
    z = uniform(-500, 500) + 20.0
    cov = [[uniform(-5, 5), z], [z, uniform(-5, 5)]]
    s = multivariate_normal([x, y], cov, nbr_cluster[1])
    xo = s[:, 0]
    yo = s[:, 1]
    print s
    sys.stdout.flush()

    for i in range(nbr_cluster[0] - 1):
        x = uniform(50, 950)
        y = uniform(50, 950)
        z = uniform(-500, 500) + 20.0
        cov = [[uniform(-5, 5), z], [z, uniform(-5, 5)]]
        s = multivariate_normal([x, y], cov, nbr_cluster[1])
        xo.extend(s[:, 0])
        yo.extend(s[:, 1])
        print xo
        sys.stdout.flush()

    savetxt("data.txt", s)

    return 1
示例#5
0
 def sample(self, T, s_init=None,x_init=None,y_init=None):
   """
   Inputs:
     T: time to run simulation
   Outputs:
     xs: Hidden continuous states
     Ss: Hidden switch states
   """
   x_dim, y_dim, = self.x_dim, self.y_dim
   # Allocate Memory
   xs = zeros((T, x_dim))
   Ss = zeros(T)
   # Compute Invariant
   _, vl = linalg.eig(self.Z, left=True, right=False)
   pi = vl[:,0]
   # Sample Start conditions
   sample = multinomial(1, pi, size=1)
   if s_init == None:
     Ss[0] = nonzero(sample)[0][0]
   else:
     Ss[0] = s_init
   if x_init == None:
     xs[0] = multivariate_normal(self.mus[Ss[0]], self.Sigmas[Ss[0]])
   else:
     xs[0] = x_init
   # Perform time updates
   for t in range(0,T-1):
     s = Ss[t]
     A = self.As[s]
     b = self.bs[s]
     Q = self.Qs[s]
     xs[t+1] = multivariate_normal(dot(A,xs[t]) + b, Q)
     sample = multinomial(1,self.Z[s],size=1)[0]
     Ss[t+1] = nonzero(sample)[0][0]
   return (xs, Ss)
示例#6
0
文件: outliers.py 项目: HMP1/bumps
def test():
    from .walk import walk
    from numpy.random import multivariate_normal, seed
    from numpy import vstack, ones, eye
    seed(2) # Remove uncertainty on tests
    # Set a number of good and bad chains
    Ngood,Nbad = 25,2

    # Make chains mean-reverting chains with widely separated values for
    # bad and good; put bad chains first.
    chains = walk(1000, mu=[1]*Nbad+[5]*Ngood, sigma=0.45, alpha=0.1)

    # Check IQR and Grubbs
    assert (identify_outliers('IQR',chains,None) == arange(Nbad)).all()
    assert (identify_outliers('Grubbs',chains,None) == arange(Nbad)).all()

    # Put points for 'bad' chains at [-1,...,-1] and 'good' chains at [1,...,1]
    x = vstack( (multivariate_normal(-ones(4),.1*eye(4),size=Nbad),
                 multivariate_normal(ones(4),.1*eye(4),size=Ngood)) )
    assert identify_outliers('Mahal',chains,x)[0] in range(Nbad)

    # Put points for _all_ chains at [1,...,1] and check that mahal return []
    xsame = multivariate_normal(ones(4),.2*eye(4),size=Ngood+Nbad)
    assert len(identify_outliers('Mahal',chains,xsame)) == 0

    # Check again with large variance
    x = vstack( (multivariate_normal(-3*ones(4),eye(4),size=Nbad),
                 multivariate_normal(ones(4),10*eye(4),size=Ngood)) )
    assert len(identify_outliers('Mahal',chains,x)) == 0


    # =====================================================================
    # Test replacement

    # Construct a state object
    from numpy.linalg import norm
    from .state import MCMCDraw
    Ngen, Npop = chains.shape
    Npop, Nvar = x.shape
    state = MCMCDraw(Ngen=Ngen, Nthin=Ngen, Nupdate=0,
                     Nvar=Nvar, Npop=Npop, Ncr=0, thinning=0)
    # Fill it with chains
    for i in range(Ngen):
        state._generation(new_draws=Npop, x=x, logp=chains[i], accept=Npop)

    # Make a copy of the current state so we can check it was updated
    nx, nlogp = x+0,chains[-1]+0
    # Remove outliers
    state.remove_outliers(nx, nlogp, test='IQR', portion=0.5)
    # Check that the outliers were removed
    outliers = state.outliers()
    assert outliers.shape[0] == Nbad
    for i in range(Nbad):
        assert nlogp[outliers[i,1]] == chains[-1][outliers[i,2]]
        assert norm(nx[outliers[i,1],:] - x[outliers[i,2],:]) == 0
示例#7
0
def prepare_dataset(variance):
    cov1 = np.array([[variance,0],[0,variance]])
    cov2 = np.array([[variance,0],[0,variance]])

    df1 = DataFrame(multivariate_normal(Mu1,cov1,N1),columns=['x','y'])
    df1['type'] = 1
    df2 = DataFrame(multivariate_normal(Mu2,cov2,N2),columns=['x','y'])
    df2['type'] = -1 
    df = pd.concat([df1,df2],ignore_index=True)
    df = df.reindex(np.random.permutation(df.index)).reset_index(drop=True)
    return df
def sample():
    from numpy.random import rand, multivariate_normal
    data_a = multivariate_normal(rand(1) * 20 - 10, np.eye(1) * (rand()), 250)
    data_b = multivariate_normal(rand(1) * 20 - 10, np.eye(1) * (rand()), 250)
    data_c = multivariate_normal(rand(1) * 20 - 10, np.eye(1) * (rand()), 250)

    data_d = multivariate_normal(rand(1) * 20 - 10, np.eye(1) * (rand()), 250)
    X = np.r_[data_a, data_b, data_c, data_d][:, 0]
    c_cf = change_finder(term=70, window=7, order=(2, 2, 0))
    result = c_cf.main(X)
    return result
示例#9
0
def generate_data():
    sz = 100
    mu1 = randint(1,1000,size=sz)
    mu2 = randint(1000,2000,size=sz)
    mu3 = randint(3000,4000,size=sz)
    
    cov = np.eye(sz)
    
    x1 = multivariate_normal(mu1, cov, 100)
    x2 = multivariate_normal(mu2, cov, 20)
    x3 = multivariate_normal(mu3, cov, 50)
    
    return np.vstack((x1,x2,x3))
 def move(self, s, a):
     s = np.copy(s)
     if a == "up":
         s = s + self.move_up + npr.multivariate_normal(np.array([0., 0]), np.diag([0.25, 0.125]))
     elif a == "down":
         s = s + self.move_down + npr.multivariate_normal(np.array([0., 0]), np.diag([0.25, 0.125]))
     elif a == "left":
         s = s + self.move_left + npr.multivariate_normal(np.array([0., 0]), np.diag([0.125, 0.25]))
     elif a == "right":
         s = s + self.move_right + npr.multivariate_normal(np.array([0., 0]), np.diag([0.125, 0.25]))
     else:
         pass
     # print s
     return s
示例#11
0
def prepare_dataset(variance):
    n1 = 80
    n2 = 200
    mu1 = [9,9]
    mu2 = [-3,-3]
    cov1 = np.array([[variance,0],[0,variance]])
    cov2 = np.array([[variance,0],[0,variance]])

    df1 = DataFrame(multivariate_normal(mu1,cov1,n1),columns=['x','y'])
    df1['type'] = 1
    df2 = DataFrame(multivariate_normal(mu2,cov2,n2),columns=['x','y'])
    df2['type'] = 0
    df = pd.concat([df1,df2],ignore_index=True)
    df = df.reindex(np.random.permutation(df.index)).reset_index()
    return df[['x','y','type']]
示例#12
0
文件: data.py 项目: budzianowski/VAEB
def GenerateGaussians(N, Ntr):

  # Generate the data.  
  mu0, mu1 = np.array([1.0, 1.0]), np.array([-1.0, -1.0])
  sg = np.array([[1.0, -0.75], [-0.75, 1.0]])
  X0 = rnd.multivariate_normal(mu0, sg, size=(N)).astype(floatX)
  X1 = rnd.multivariate_normal(mu1, sg, size=(N)).astype(floatX)
  Y0 = np.hstack((np.ones((N, 1)), np.zeros((N, 1)))).astype(floatX)
  Y1 = np.hstack((np.zeros((N, 1)), np.ones((N, 1)))).astype(floatX)

  # Permute the data and split to return.
  idx = rnd.permutation(2*N)
  X, Y = np.vstack((X0, X1))[idx], np.vstack((Y0, Y1))[idx]
  return shared(X[:Ntr], 'Xtr'), shared(Y[:Ntr], 'Ytr'), \
    shared(X[Ntr:], 'Xte'), shared(Y[Ntr:], 'Yte')
示例#13
0
def sample_gp_posterior_mean(gp, coords_ptr, coords_size, bounds_ptr, \
			     sample_x_ptr, num_samples, check_constraint = None, exp_temp = 10, MH_rate = 0.09):
				 
    """
	Sample by using the Metropolis method to get policies in the re-sampling phase.
	We treat the exponentiated (GP-mean*exp_temp) as unnormalized density.
	The proposal distribution is normal distribution: N(Mu, MH_rate*I).
	
    """
    bounds = [(bounds_ptr[2*i], bounds_ptr[2*i+1]) for i in xrange(coords_size)]
    
    def check_violate_constraint(nx_pt, bounds, coords_size, check_constraint = None):
	for i in xrange(coords_size):
	    if not (nx_pt[i] <= bounds[i][1] and nx_pt[i] >= bounds[i][0]):
		return True
	if not check_constraint is None:
	    return check_constraint(nx_pt, bounds, coords_size)
	return False


    coords = zeros(coords_size)
    for i in xrange(coords_size):
        coords[i] = coords_ptr[i]
    
    import numpy.random as nr
    
    cur_pt = coords
    
    factor = MH_rate
    cov = eye(coords_size)
    for i in xrange(coords_size):
	cov[i,i] = (bounds[i][1] - bounds[i][0])**2*factor;
    
    for i in xrange(num_samples):
	for j in xrange(coords_size):
	    sample_x_ptr[i*coords_size+j] = cur_pt[j]
	
	nx_pt = nr.multivariate_normal(cur_pt, cov)
	while check_violate_constraint(nx_pt, bounds, coords_size, check_constraint):
	    nx_pt = nr.multivariate_normal(cur_pt, cov)

	mh_ratio = exp((- gp.posterior(cur_pt)[0] + gp.posterior(nx_pt)[0])*exp_temp)
	r = nr.uniform()
	
	if r < mh_ratio:
	    cur_pt = nx_pt
	        
    return 0
示例#14
0
 def setUp(self):
     self.mu = array([0, 0])
     self.sig = eye(2)
     self.pnts = multivariate_normal(self.mu, self.sig, 1000)
     self.k = 16
     self.niter = 10
     self.model = DPMixtureModel(self.k, self.niter, 0, 1)
def plot_monte_carlo_ukf():

    def f(x,y):
        return x+y, .1*x**2 + y*y

    mean = (0, 0)
    p = np.array([[32, 15], [15., 40.]])

    # Compute linearized mean
    mean_fx = f(*mean)

    #generate random points
    xs, ys = multivariate_normal(mean=mean, cov=p, size=3000).T
    fxs, fys = f(xs, ys)

    plt.subplot(121)
    plt.gca().grid(b=False)

    plt.scatter(xs, ys, marker='.', alpha=.2, color='k')
    plt.xlim(-25, 25)
    plt.ylim(-25, 25)

    plt.subplot(122)
    plt.gca().grid(b=False)

    plt.scatter(fxs, fys, marker='.', alpha=0.2, color='k')

    plt.ylim([-10, 200])
    plt.xlim([-100, 100])
    plt.show()
示例#16
0
def mixnormrnd(pi, mu, sigma, k):
    """Generate random variables from mixture of Guassians"""
    xs = []
    for unused in range(k):
        j = sum(random() > cumsum(pi))
        xs.append(multivariate_normal(mu[j], sigma[j]))
    return array(xs)
示例#17
0
    def simulate(self, ts_length=100):
        """
        Simulate a time series of length ts_length, first drawing

            x_0 ~ N(mu_0, Sigma_0)

        Parameters
        ----------

        ts_length : scalar(int), optional(default=100)
            The length of the simulation

        Returns
        -------
        x : array_like(float)
            An n x ts_length array, where the t-th column is x_t
        y : array_like(float)
            A k x ts_length array, where the t-th column is y_t

        """
        x0 = multivariate_normal(self.mu_0.flatten(), self.Sigma_0)
        w = np.random.randn(self.m, ts_length-1)
        v = self.C.dot(w) # Multiply each w_t by C to get v_t = C w_t
        # == simulate time series == #
        x = simulate_linear_model(self.A, x0, v, ts_length)
        
        if self.H is not None:
            v = np.random.randn(self.l, ts_length)
            y = self.G.dot(x) + self.H.dot(v)
        else:
            y = self.G.dot(x)

        return x, y
示例#18
0
def simulate_mixed_logit(num_pers,predict_data,config):
	
	num=num_pers
	to_load=zeros(len(predict_data))
	while num>0:
		exputils=zeros(len(predict_data))
		for path_idx in range(len(predict_data)):
			vals=predict_data[path_idx]
			u=0
			for i in range(len(config['fixed_coefficients'])):
				u=u+config['alpha'][i]*vals[config['fixed_coefficients'][i]]
		
			if config['use_random_coefficients']:
				beta=nr.multivariate_normal(config['latent_mu'],config['latent_sigma'])
				for i in range(len(beta)):
					beta[i]=config['random_transformations'][i](beta[i])
					u=u+beta[i]*vals[config['random_coefficients'][i]]

			exputils[path_idx]=exp(u)
		
		if num>config['mixing_granularity'] and config['use_random_coefficients']:
			to_load=to_load+config['mixing_granularity']*exputils/sum(exputils)
		else:
			to_load=to_load+num*exputils/sum(exputils)

		if config['use_random_coefficients']:
			num=num-config['mixing_granularity']
		else:
			num=0
		
	return to_load
    def sampleConditionalDistribution(self, indices, values):
        #Calculate cumulative indices in the mean vector
        counter = 0
        cum_indices = []
        for i in range(len(self.dims)):
            if i in indices:
                for j in range(counter,counter+self.dims[i]):
                    cum_indices.append(j)
            counter += self.dims[i]

        #Mask with newly calculated indices
        state_mask = ones(self.state_dim,dtype=bool)
        state_mask[cum_indices] = False
        condition_mask = logical_not(state_mask)
        s11 = self.covar[state_mask][:,state_mask]
        s12 = self.covar[state_mask][:,condition_mask]
        s21 = self.covar[condition_mask][:,state_mask]
        s22 = self.covar[condition_mask][:,condition_mask]
        m1 = zeros((sum(state_mask),1))
        m2 = zeros((sum(condition_mask),1))

        #Project conditioned values
        value_projected = matrix([[]]).reshape((0,1))
        for i in xrange(len(values)):
            value_projected = matrix(concatenate((value_projected,self.mean[indices[i]].log(values[i]))))

        #Calculate new mean
        m_prime = m1 + s12*s22.getI()*(value_projected-m2)
        s_prime = s11 - s12*s22.getI()*s21
        sample = matrix(random.multivariate_normal(mean=m_prime.getT().tolist()[0], cov=s_prime)).getT()

        #Pack in order to return
        rest = list(set(range(len(self.dims)))-set(indices))
        return packPoints(self.mean, [sample], rest, self.types, self.dims)[0]
示例#20
0
    def simulate(self, ts_length=100):
        """
        Simulate a time series of length ts_length, first drawing

            x_0 ~ N(mu_0, Sigma_0)

        Parameters
        ----------

        ts_length : scalar(int), optional(default=100)
            The length of the simulation

        Returns
        -------
        x : array_like(float)
            An n x ts_length array, where the t-th column is x_t
        y : array_like(float)
            A k x ts_length array, where the t-th column is y_t

        """
        x = np.empty((self.n, ts_length))
        x[:, 0] = multivariate_normal(self.mu_0.flatten(), self.Sigma_0)
        w = np.random.randn(self.m, ts_length - 1)
        for t in range(ts_length - 1):
            x[:, t + 1] = self.A.dot(x[:, t]) + self.C.dot(w[:, t])
        y = self.G.dot(x)

        return x, y
示例#21
0
文件: fem.py 项目: Angeliqe/pybrain
    def _produceNewSample(self):
        """ returns a new sample, its fitness and its densities """
        chosenOne = drawIndex(self.alphas, True)
        mu = self.mus[chosenOne]

        if self.useAnticipatedMeanShift:
            if len(self.allsamples) % 2 == 1 and len(self.allsamples) > 1:
                if not(self.elitism and chosenOne == self.bestChosenCenter):
                    mu += self.meanShifts[chosenOne]

        if self.diagonalOnly:
            sample = normal(mu, self.sigmas[chosenOne])
        else:
            sample = multivariate_normal(mu, self.sigmas[chosenOne])
        if self.sampleElitism and len(self.allsamples) > self.windowSize and len(self.allsamples) % self.windowSize == 0:
            sample = self.bestEvaluable.copy()
        fit = self._oneEvaluation(sample)

        if ((not self.minimize and fit >= self.bestEvaluation)
            or (self.minimize and fit <= self.bestEvaluation)
            or len(self.allsamples) == 0):
            # used to determine which center produced the current best
            self.bestChosenCenter = chosenOne
            self.bestSigma = self.sigmas[chosenOne].copy()
        if self.minimize:
            fit = -fit
        self.allfitnesses.append(fit)
        self.allsamples.append(sample)
        return sample, fit
示例#22
0
 def sample_from_posterior_given_hypers_and_data(self, pred, n_samples=1, joint=True):
     if joint:
         predicted_mean, cov = self.predict(pred, full_cov=True) # This part depends on the data
         return npr.multivariate_normal(predicted_mean, cov, size=n_samples).T.squeeze()
     else:
         predicted_mean, var = self.predict(pred, full_cov=False) # This part depends on the data
         return np.squeeze(predicted_mean[:,None] + npr.randn(pred.shape[0], n_samples) * np.sqrt(var)[:,None])
示例#23
0
def gibbs_sample(groups, votes, n_samples, n_burnin):
  """ Performs Gibbs Sampling over groups.

      Observations:
      - Each Variable object has a list of samples. Once a new sample is
      generated, the value used for the variable is the new sample.
      - After sampling, the values of latent variables are changed to empiric
      mean of samples.

      Args:
        groups: a dict of Group objects.
        votes: list of votes, each one represented as a dictionary, which is the
      training data.
        n_samples: the number of samples to obtain.
        n_burnin: number of initial samples to ignore.

      Returns:
        None. The samples are inserted into Variable objects.
  """
  burn_count = 0
  for _ in xrange(n_samples + n_burnin):
    for g_name in ['alpha', 'beta', 'xi', 'u', 'v', 'gamma', 'lambda']:
      group = groups[g_name]
      if isinstance(group, EntityArrayGroup):
        for variable in group.iter_variables():
          mean, var = variable.get_cond_mean_and_var(groups, votes)
          sample = multivariate_normal(mean.reshape(-1), var)
          variable.add_sample(sample.reshape(sample.size, 1))
      else:
        for variable in group.iter_variables():
          mean, var = variable.get_cond_mean_and_var(groups, votes)
          sample = normal(mean, sqrt(var))
          variable.add_sample(sample)
      if burn_count < n_burnin:
        burn_count += 1
示例#24
0
def copula(num_samples, rho_mat, mu_mat, methods):
    """Copula procedure to generate an OTU table with corrs close to rho_mat.
    Inputs:
     num_samples - int, number of samples. 
     rho_mat - 2d arr, symmetric positive definite matrix which specifies the 
     correlation or covariation between the otu's in the table. 
     mu_mat - 1d arr w/ len(num_otus), mean of otu for multivariate random call.
     methods - list of lists w/ len(num_otus), each list has a variable number 
     of elements. the first element in each list is the 
     scipy.stats.distributions function like lognorm or beta. this is the 
     function that we draw values from for the actual otu. the remaining entries
     are the parameters for that function in order that the function requires 
     them.
    """
    num_otus = len(mu_mat)
    # draw from multivariate normal distribution with specified parameters.
    # transpose so that it remains otuXsample matrix.
    Z = multivariate_normal(mean=mu_mat, cov=rho_mat, size=num_samples).T
    # using the inverse cdf of the normal distribution find where each sample 
    # value for each otu falls in the normal cdf.
    U = norm.cdf(Z)
    # make the otu table using the methods and cdf values. ppf_args[0] is the 
    # distribution function (eg. lognorm) whose ppf function we will use
    # to transform the cdf vals into the new distribution. ppf_args[1:] is the 
    # params of the function like a, b, size, loc etc. 
    otu_table = array([ppf_args[0].ppf(otu_cdf_vals, *ppf_args[1:], 
        size=num_otus) for ppf_args, otu_cdf_vals in zip(methods, U)])
    return where(otu_table > 0, otu_table, 0)
示例#25
0
    def _sample_entity(self, X, mask, E, R, i, var_e, RE, RTE):
        _lambda = np.identity(self.n_dim) / var_e

        nz_r = mask[:, i, :].nonzero()
        nz_c = mask[:, :, i].nonzero()
        nnz_r = nz_r[0].size
        nnz_c = nz_c[0].size
        nnz_all = nnz_r + nnz_c
        self.features[:nnz_r] = RE[nz_r]
        self.features[nnz_r:nnz_all] = RTE[nz_c]
        self.Y[:nnz_r] = X[:, i, :][nz_r]
        self.Y[nnz_r:nnz_all] = X[:, :, i][nz_c]

        features = self.features[:nnz_all]
        Y = self.Y[:nnz_all]
        try:
            logit = LogisticRegression(penalty='l2', C=1.0 / var_e, fit_intercept=False)
            logit.fit(features, Y)
            mu = logit.coef_[0]
            prd = logit.predict_proba(features)
            _lambda += np.dot(features.T * (prd[:, 0] * prd[:, 1]), features)
        except:
            mu = np.zeros(self.n_dim)

        inv_lambda = np.linalg.inv(_lambda)
        E[i] = multivariate_normal(mu, inv_lambda)
示例#26
0
def particle_movement(leftwheel, rightwheel, R, robot):
    if 'particles' not in robot:
        abort(404)

    particles = array(robot['particles'])

    dd = (leftwheel + rightwheel) / 2
    dh = (rightwheel - leftwheel) / ROBOT_RADIUS

    z = zeros((len(particles[0]),))

    noises = multivariate_normal(z, R, particles.shape[0])

    new_particles = zeros(particles.shape)
    for i, (particle, noise) in enumerate(zip(particles, noises)):
        x = particle[0]
        y = particle[1]
        h = particle[2]
        new_particles[i] = array([
            x + dd*cos(h),
            y + dd*sin(h),
            h + dh
            ]) + noise

    return {
        'key': robot['key'],
        'type': 'particle',
        'particles': new_particles.tolist(),
        'mu': mean(new_particles, 0).reshape((-1, 1)).tolist(),
        'sigma': cov(new_particles, rowvar=0).tolist()
        }
示例#27
0
    def _sample_relation(self, X, mask, E, R, k, EXE, var_r):
        if self.approx_diag:
            _lambda = np.ones(self.n_dim ** 2) / var_r
        else:
            _lambda = np.identity(self.n_dim ** 2) / var_r

        kron = EXE[mask[k].flatten() == 1]
        Y = X[k][mask[k] == 1].flatten()

        if len(np.unique(Y)) == 2:
            logit = LogisticRegression(penalty='l2', C=1.0 / var_r, fit_intercept=False)
            logit.fit(kron, Y)
            mu = logit.coef_[0]
            prd = logit.predict_proba(kron)

            if self.approx_diag:
                _lambda += np.sum(kron.T ** 2 * prd[:, 0] * prd[:, 1], 1)
            else:
                _lambda += np.dot(kron.T * (prd[:, 0] * prd[:, 1]), kron)
        else:
            mu = np.zeros(self.n_dim ** 2)

        if self.approx_diag:
            inv_lambda = 1. / _lambda
            R[k] = np.random.normal(mu, inv_lambda).reshape(R[k].shape)
        else:
            inv_lambda = np.linalg.inv(_lambda)
            R[k] = multivariate_normal(mu, inv_lambda).reshape(R[k].shape)
示例#28
0
文件: kde.py 项目: greatlse/barnaba
    def resample(self, size=None):
        """
        Randomly sample a dataset from the estimated pdf.

        Parameters
        ----------
        size : int, optional
            The number of samples to draw.  If not provided, then the size is
            the same as the underlying dataset.

        Returns
        -------
        resample : (self.d, `size`) ndarray
            The sampled dataset.

        """
        if size is None:
            size = self.n

        norm = transpose(multivariate_normal(zeros((self.d,), float),
                         self.covariance, size=size))
        indices = randint(0, self.n, size=size)
        means = self.dataset[:, indices]

        return means + norm
示例#29
0
def _newQueryStateFromCtm(data, model):
    import model.ctm_bohning as ctm

    ctm_model = ctm.newModelAtRandom(data, model.K, VocabPrior, model.dtype)
    ctm_query = ctm.newQueryState(data, model)
    ctm_plan  = ctm.newTrainPlan(200, epsilon=1, logFrequency=100, debug=False)

    ctm_model, ctm_query, (_, _, _) = ctm.train(data, ctm_model, ctm_query, ctm_plan)

    model.vocab[:,:]    = ctm_model.vocab
    model.topicCov[:,:] = ctm_model.sigT
    model.topicMean[:]  = ctm_model.topicMean

    K, vocab, dtype =  model.K, model.vocab, model.dtype

    D,T = data.words.shape
    assert T == vocab.shape[1], "The number of terms in the document-term matrix (" + str(T) + ") differs from that in the model-states vocabulary parameter " + str(vocab.shape[1])
    docLens = np.squeeze(np.asarray(data.words.sum(axis=1)))

    outMeans = ctm_query.means
    outVarcs = np.ones((D,K), dtype=dtype)

    inMeans = np.ndarray(shape=(D,K), dtype=dtype)
    for d in range(D):
        inMeans[d,:] = rd.multivariate_normal(outMeans[d,:], model.topicCov)
    inVarcs = np.ones((D,K), dtype=dtype)

    inDocCov  = np.ones((D,), dtype=dtype)

    return QueryState(outMeans, outVarcs, inMeans, inVarcs, inDocCov, docLens)
示例#30
0
文件: stats.py 项目: bmswgnp/filterpy
def _do_plot_test():

    from numpy.random import multivariate_normal
    p = np.array([[32, 15],[15., 40.]])

    x,y = multivariate_normal(mean=(0,0), cov=p, size=5000).T
    sd = 2
    a,w,h = covariance_ellipse(p,sd)
    print (np.degrees(a), w, h)

    count = 0
    color=[]
    for i in range(len(x)):
        if _is_inside_ellipse(x[i], y[i], 0, 0, a, w, h):
            color.append('b')
            count += 1
        else:
            color.append('r')
    plt.scatter(x,y,alpha=0.2, c=color)


    plt.axis('equal')

    plot_covariance_ellipse(mean=(0., 0.),
                            cov = p,
                            std=sd,
                            facecolor='none')

    print (count / len(x))
示例#31
0
	def init_filter(self, R, P):
		self.R = R
		self.P = P
		if self.known_init_pos:
			self.particles = multivariate_normal(self.state, P, N_PARTICLES)
			self.particles[:,2] = np.random.uniform(-self.bound[4], self.bound[4], N_PARTICLES)
			self.particles[:,3] = np.random.uniform(-self.bound[5], self.bound[5], N_PARTICLES)

		else:
			self.particles = uniform_init(x_low=self.bound[0], 
											x_high=self.bound[1], 
											y_low=self.bound[2], 
											y_high=self.bound[3], 
											v_x=self.bound[4], 
											v_y=self.bound[5], 
											n=N_PARTICLES)

		self.weights = np.repeat(1/N_PARTICLES, N_PARTICLES)
		self.state_estimate = np.average(self.particles, axis=0, weights=self.weights)
		self.z_hat = self.state_estimate[:2]
示例#32
0
    def _generate_data(cls, n, d, untreated_outcome, treatment_effect, propensity):
        """Generates population data for given untreated_outcome, treatment_effect and propensity functions.

        Parameters
        ----------
            n (int): population size
            d (int): number of covariates
            untreated_outcome (func): untreated outcome conditional on covariates
            treatment_effect (func): treatment effect conditional on covariates
            propensity (func): probability of treatment conditional on covariates
        """
        # Generate covariates
        X = multivariate_normal(np.zeros(d), np.diag(np.ones(d)), n)
        # Generate treatment
        T = np.apply_along_axis(lambda x: binomial(1, propensity(x), 1)[0], 1, X)
        # Calculate outcome
        Y0 = np.apply_along_axis(lambda x: untreated_outcome(x), 1, X)
        treat_effect = np.apply_along_axis(lambda x: treatment_effect(x), 1, X)
        Y = Y0 + treat_effect * T
        return (X, T, Y)
示例#33
0
    def predict(self):
        """ Predict next position. """

        N = self.N
        for i, s in enumerate(self.sigmas):
            self.sigmas[i] = self.fx(s, self.dt)

        e = multivariate_normal(self._mean, self.Q, N)
        self.sigmas += e

        self.x = np.mean(self.sigmas, axis=0)

        P = 0
        for y in (self.sigmas - self.x):
            P += outer(y, y)
        self.P = P / (N - 1)

        # save prior
        self.x_prior = np.copy(self.x)
        self.P_prior = np.copy(self.P)
示例#34
0
 def spawn(self, errcov=0.0, names=None):
     """ generate one LightCurve for which the lightcurve values are the sum of the original ones and gaussian variates from gaussian errors.
     """
     # _zylclist = list(self.zylclist) # copy the original list
     _zylclist = deepcopy(self.zylclist)  # copy the original list
     for i in xrange(self.nlc):
         e = np.atleast_1d(_zylclist[i][2])
         nwant = e.size
         ediag = np.diag(e * e)
         if errcov == 0.0:
             ecovmat = ediag
         else:
             temp1 = np.repeat(e, nwant).reshape(nwant, nwant)
             temp2 = (temp1 * temp1.T - ediag) * errcov
             ecovmat = ediag + temp2
         et = multivariate_normal(np.zeros_like(e), ecovmat)
         _zylclist[i][1] = _zylclist[i][1] + et
     if names is None:
         names = ["-".join([r, "mock"]) for r in self.names]
     return (LightCurve(_zylclist, names=names))
示例#35
0
def simulation(rng_seed, n, p, rho):
    """
    :param rng_seed: random seed
    :param n:        number of samples
    :param p:        number of features
    :return:         dictionary storing all infos
    """
    print("Simulate negative binomial counts regression\n n=%d, p=%d, rho=%3.2f" % (n, p, rho))
    seed(rng_seed)

    X_mu = normal(0, .1, p)
    #X_Sigma = diag(ones(p))

    # simulate correlated matrix
    temp = np.abs(np.repeat([range(1, p+1)], p, axis=0).transpose() - np.repeat([range(1, p+1)], p, axis=0))
    X_Sigma = np.power(rho, temp)

    # draw independent samples from MVN(X_mu, X_Sigma)
    X = multivariate_normal(X_mu, X_Sigma, n)

    # sample from bernoulli and uniform for coefficient beta and model space gamma
    opt_gamma = binomial(1, 0.15, p)  # model gamma
    opt_beta = uniform(-2, 2, p)
    # drop all elements in beta whose absolute value less than 0.5
    opt_gamma &= abs(opt_beta) > 0.5 
    opt_beta *= opt_gamma  # coefficients
    opt_beta0 = 2  # bias
    opt_r = 1  # over-dispersion parameter
    opt_z = np.dot(X, opt_beta) + opt_beta0
    opt_lam = gamma(opt_r, exp(opt_z), n)
    y = poisson(opt_lam, n)

    opt_omega = expect_omega(opt_z, y, opt_r)

    # put everything into a dictionary and return back
    negative_binomial_dict = {"X": X, "y": y, "opt_beta": opt_beta, "opt_beta0": opt_beta0,
                              "opt_r": opt_r, "opt_gamma": opt_gamma, "opt_omega": opt_omega, 
                              "opt_m": np.sum(opt_gamma), "opt_model": np.nonzero(opt_gamma),
                              "seed": ran_seed}

    return negative_binomial_dict
示例#36
0
    def simulate(self, ts_length=100, random_state=None):
        r"""
        Simulate a time series of length ts_length, first drawing

        .. math::

            x_0 \sim N(\mu_0, \Sigma_0)

        Parameters
        ----------
        ts_length : scalar(int), optional(default=100)
            The length of the simulation
        random_state : int or np.random.RandomState, optional
            Random seed (integer) or np.random.RandomState instance to set
            the initial state of the random number generator for
            reproducibility. If None, a randomly initialized RandomState is
            used.

        Returns
        -------
        x : array_like(float)
            An n x ts_length array, where the t-th column is :math:`x_t`
        y : array_like(float)
            A k x ts_length array, where the t-th column is :math:`y_t`

        """
        random_state = check_random_state(random_state)

        x0 = multivariate_normal(self.mu_0.flatten(), self.Sigma_0)
        w = random_state.randn(self.m, ts_length-1)
        v = self.C.dot(w)  # Multiply each w_t by C to get v_t = C w_t
        # == simulate time series == #
        x = simulate_linear_model(self.A, x0, v, ts_length)

        if self.H is not None:
            v = random_state.randn(self.l, ts_length)
            y = self.G.dot(x) + self.H.dot(v)
        else:
            y = self.G.dot(x)

        return x, y
示例#37
0
def create_mock_classification_data(mus, sigs, ns, rseed=None, verbose=False):
    '''
    Create mock classification dataset using Gaussian distributions
    Data contains 'n' points distributed in 'c' classes with 'd' features
    TODO: Add feature correlation via correlation matrices
    @params:
        mus - Length 'c' list of means, each entry is an array of length 'd'
        sigs - Length 'c' list of standard deviations, each entry is an array of length 'd'
        ns - Length 'c' list of integer number of members of each class
        rseed - Random number seed
    '''
    from numpy import array, diag, append
    from numpy.random import seed, multivariate_normal
    if verbose:
        print('Creating mock classification dataset')
        print('Number of features:', len(mus[0]))
        print('Number of classes:', len(ns))
        print('Total number of entries:', sum(ns))
        print()
    x = array([])
    if rseed is not None: seed(rseed)
    for i, (mu, sig, n) in enumerate(zip(mus, sigs, ns)): # mus, sigs, ns must be the same length
        if verbose:
            print('Class %d members: %d'%(i, n))
            print('Mean:', mu)
            print('Standard deviation:', sig)
            print()
        y = multivariate_normal(mean=mu, cov=diag(sig), size=n)
        if i==0:
            x = y.copy()
        else:
            x = append(x, y, axis=0)
    labels = []
    for i, n in enumerate(ns):
        labels += n*['c'+str(i)] # Label could be less boring than 'i'
    data = {'class': labels}
    for i in range(len(ns)):
        data['x%d'%(i+1)] = x[:, i]
    df = pd.DataFrame.from_dict(data)
    df = shuffle(df) # Shuffle entries
    return df
示例#38
0
    def sample_user_hyperparameter(self):
        U_bar = np.mean(self.U, axis=0)
        S_bar = np.zeros((self.D, self.D))
        for i in range(self.N):
            S_bar += np.outer(self.U[i, :], self.U[i, :])
        beta0 = self.beta0 + self.N
        mu0 = (self.beta0 * self.mu0 + self.N * U_bar) / beta0
        v0 = self.v0 + self.N

        # Note that our choice of W0 is the identity matrix so inverse has no difference
        W0_inverse = inv(self.W0) + S_bar + (self.beta0*self.N)/(self.beta0+self.N)\
                                            *np.outer(self.mu0-U_bar, self.mu0-U_bar)
        W0 = inv(W0_inverse)
        # Sample Lambda_U from Wishart
        Lambda_U = wishart.rvs(v0, W0)

        # Sample muU from Gaussian
        precision = inv(beta0 * Lambda_U)
        Mu_U = multivariate_normal(mean=mu0, cov=precision)

        return Mu_U, Lambda_U
示例#39
0
def Multiple_Descendent_Proposal(particles, y, drift, q, multiple_des=4):
    track_prop = []
    weight = []
    for par in particles:
        for k in range(multiple_des):
            xt, yt = par
            Rt = np.matrix([[-xt, yt], [-yt, -xt]]) / np.sqrt((xt**2 + yt**2))
            Zt = random.multivariate_normal(
                np.zeros(2), q**2 * np.matrix([[1, 0], [0, k**2]]))
            Et = np.array(Rt.T) @ Zt
            xnew = xt + drift[0] + Et[0]
            ynew = yt + drift[1] + Et[1]
            track_prop.append([xnew, ynew])
            weight.append(norm.logpdf(f(xnew, ynew)[0], y, scale=delta))
    track_prop = np.array(track_prop)
    weight = np.array(weight)
    weight = weight - max(weight)
    weight = np.exp(weight)
    weight = weight / np.sum(weight)
    #print('dp', np.sum(weight**2))
    return track_prop, weight
示例#40
0
def multivariate_k(mu, sigma, p, nu):
    """ Generate a sample drawn from a multivariate t distribution.
    
    Parameters
    ----------
    mu    : 1-d array of size m
            mean of the distribution
    sigma : 2-d array of size m*m
            shape matrix with det = 1
    p     : float > 0
            scale parameter
    nu    : integer > 0
            Degree of freedom of the distribution
    Returns
    -------
    x     : 1-d array of size m
            sample generated
    """

    return mu + multivariate_normal(np.zeros(len(mu)), p * sigma) * np.sqrt(
        gamma(nu, 1 / nu))
示例#41
0
 def null_model(num_samples, dimension=1, rho=0):
     data_z = np.reshape(uniform(0, 5, num_samples * dimension),
                         (num_samples, dimension))
     coin_flip_x = np.random.choice([0, 1], replace=True, size=num_samples)
     coin_flip_y = np.random.choice([0, 1], replace=True, size=num_samples)
     mean_noise = [0, 0]
     cov_noise = [[1, 0], [0, 1]]
     noise_x, noise_y = multivariate_normal(mean_noise, cov_noise,
                                            num_samples).T
     data_x = zeros(num_samples)
     data_x[coin_flip_x == 0, ] = 1.7 * data_z[coin_flip_x == 0, 0]
     data_x[coin_flip_x == 1, ] = -1.7 * data_z[coin_flip_x == 1, 0]
     data_x = data_x + noise_x
     data_y = zeros(num_samples)
     data_y[coin_flip_y == 0, ] = (data_z[coin_flip_y == 0, 0] - 2.7)**2
     data_y[coin_flip_y ==
            1, ] = -(data_z[coin_flip_y == 1, 0] - 2.7)**2 + 13
     data_y = data_y + noise_y
     data_x = np.reshape(data_x, (num_samples, 1))
     data_y = np.reshape(data_y, (num_samples, 1))
     return data_x, data_y, data_z
示例#42
0
def make_data(m=100):  # scaled data
    from numpy import array
    from numpy.random import multivariate_normal
    from myutils.datasets import make_legitimate_covariance_matrix
    Σ = [
        [7, -5, 4, 8],  # RSq will be about 0.85
        [-5, 14, -9, 0],
        [4, -9, 15, -5],
        [8, 0, -5, 19]
    ]
    Σ = array(Σ, dtype='i')
    Σ = Σ | Σ.transpose()

    Σ = make_legitimate_covariance_matrix(
        ndim=4)  # use this covariance matrix instead of the above one

    mx = multivariate_normal(mean=[0, 0, 0, 0], cov=Σ, size=m)

    mx = scale(mx)
    X, y = split(mx)
    return X, y
示例#43
0
文件: demo.py 项目: hummmblelu/eckNN
def main():
    r = 0.9999
    data = nr.multivariate_normal([0, 0], [[1, r], [r, 1]], 100)
    print "Entropy: "
    print "Ground Truth = ", log(2 * pi * exp(1)) + 0.5 * log(1 - r**2)
    print "LNN: H(X) =  ", lnn.entropy(data)
    print "KDE: H(X) = ", lnn.KDE_entropy(data)
    print "KL: H(X) = ", lnn.KL_entropy(data)
    print "LNN(1st order): H(X) = ", lnn.LNN_1_entropy(data), "\n"

    print "Mutual Information: "
    print "Ground Truth = ", -0.5 * log(1 - r**2)
    print "LNN: I(X;Y) =  ", lnn.mi(data, split=1)
    print "KDE: I(X;Y) =  ", lnn._3KDE_mi(data, split=1)
    print "3KL: I(X;Y) =  ", lnn._3KL_mi(data, split=1)
    print "KSG: I(X;Y) =  ", lnn._KSG_mi(data, split=1)
    print "LNN(1st order): I(X;Y) =  ", lnn._3LNN_1_mi(data, split=1)
    print "LNN(1st order, KSG trick): I(X;Y) =  ", lnn._3LNN_1_KSG_mi(data,
                                                                      split=1)
    print "LNN(2nd order, KSG trick): I(X;Y) =  ", lnn._3LNN_2_KSG_mi(data,
                                                                      split=1)
示例#44
0
    def predict(self, Q=None):
        """ Predict next position. """
        if Q == None:
            Q = self.Q
        if np.isscalar(Q):
            Q = eye(self.dim_x) * Q

        N = self.N
        for i, s in enumerate(self.sigmas):
            self.sigmas[i] = self.fx(s, self.dt)

        # e = multivariate_normal(self._mean, self.Q, N)
        e = multivariate_normal(self._mean, Q, N)
        self.sigmas += e

        self.x = np.mean(self.sigmas, axis=0)
        self.P = outer_product_sum(self.sigmas - self.x) / (N - 1)

        # save prior
        self.x_prior = np.copy(self.x)
        self.P_prior = np.copy(self.P)
def _internal_dynamics(Mprior, Vparent, Achild, Vchild, N=2):
    """
    Sample from dynamics conditional of an internal node in tree
    :param Mprior: prior of dynamics
    :param Vparent: prior column covariance
    :param Achild: sum of realization of dynamics of children
    :param Vchild: children column covariance
    :return: Sample from posterior
    """
    assert Mprior.shape == Achild.shape
    precision_parent = np.linalg.inv(
        np.kron(Vparent, np.eye(Achild[:, 0].size)))
    precision_child = np.linalg.inv(np.kron(Vchild, np.eye(Achild[:, 0].size)))

    posterior_sigma = np.linalg.inv(precision_parent + N * precision_child)
    posterior_mu = posterior_sigma @ (
        precision_parent @ Mprior.flatten(order='F')[:, na] +
        precision_child @ Achild.flatten(order='F')[:, na])
    return npr.multivariate_normal(posterior_mu.flatten(),
                                   posterior_sigma).reshape(Achild.shape,
                                                            order='F')
示例#46
0
def sample_mog(nsamp, params, component=None, shuffle=False):
    """Sample from a mixture model."""
    if component == None:
        nums = random.multinomial(nsamp, np.exp(params['logalpha']))
    else:
        nums = np.zeros(len(params['logalpha']), dtype=int)
        nums[component] = nsamp
    D = params['sigma'].shape[0]
    samples = np.empty((D, nsamp))
    cnt = 0
    for cmpt in xrange(len(nums)):
        mu = params['mu'][:, cmpt]
        sigma = params['sigma'][:, :, cmpt]
        s = cnt
        t = cnt + nums[cmpt]
        samples[:, s:t] = random.multivariate_normal(mu, sigma,
                                                     (nums[cmpt], )).T
        cnt = t
    if shuffle:
        samples = np.asarray(samples[:, np.random.shuffle(np.arange(nsamp))])
    return samples
示例#47
0
def pw_normal(n_samples=200, n_bkps=3):
    """Return a 2D piecewise Gaussian signal and the associated changepoints.

    Args:
        n_samples (int, optional): signal length
        n_bkps (int, optional): number of change points

    Returns:
        tuple: signal of shape (n_samples, 2), list of breakpoints
    """
    # breakpoints
    bkps = draw_bkps(n_samples, n_bkps)
    # we create the signal
    signal = np.zeros((n_samples, 2), dtype=float)
    cov1 = np.array([[1, 0.9], [0.9, 1]])
    cov2 = np.array([[1, -0.9], [-0.9, 1]])
    for sub, cov in zip(np.split(signal, bkps), cycle((cov1, cov2))):
        n_sub, _ = sub.shape
        sub += rd.multivariate_normal([0, 0], cov, size=n_sub)

    return signal, bkps
示例#48
0
def multivariate_inverse_gaussian(mu, sigma, p, beta):
    """ Generate a sample drawn from a multivariate t distribution.
    
    Parameters
    ----------
    mu    : 1-d array of size m
            mean of the distribution
    sigma : 2-d array of size m*m
            shape matrix with det = 1
    p     : float > 0
            scale parameter
    beta  : float > 0
            shape parameter
    Returns
    -------
    x     : 1-d array of size m
            sample generated
    """

    return mu + multivariate_normal(np.zeros(len(mu)), p * sigma) * np.sqrt(
        wald(1, beta))
示例#49
0
    def test_uncertainty_correlation(self):
        seed(1)
        sample_size = 2**15
        for expected in [0, 0.75]:
            # Make the error distribution
            y_true = uniform(0, 1, sample_size)

            # Make the errors and uncertainties
            draw = multivariate_normal([0, 0], [[1, expected], [expected, 1]],
                                       sample_size)

            # Add the errors, and separate out the standard deviations
            y_pred = y_true + [d[0] * normal(0, 1) for d in draw]
            y_std = [abs(d[1]) for d in draw]

            # Test with a very large tolerance for now
            measured_corr = uncertainty_correlation(y_true, y_pred, y_std)
            corr_error = abs(measured_corr - expected)
            self.assertLess(
                corr_error, 0.25,
                'Error for {:.2f}: {:.2f}'.format(expected, corr_error))
def generate_trajectory(A,
                        Q,
                        R,
                        starting_pt,
                        depth,
                        leaf_path,
                        K,
                        T,
                        D_in,
                        noise=True,
                        u=None,
                        D_bias=None):
    if u is D_bias is None:
        u = np.ones((1, T))
        D_bias = 1
    x = np.zeros((D_in, T + 1))
    x[:, 0] = starting_pt
    z = np.zeros(T + 1).astype(int)
    for t in range(T):
        log_p = compute_leaf_log_prob(R, x[:, t], K, depth, leaf_path)
        p_unnorm = np.exp(log_p - np.max(log_p))
        p = p_unnorm / np.sum(p_unnorm)
        if noise:  # Stochastically choose the discrete latent and add noise to continuous latent
            choice = npr.multinomial(1, p.ravel(), size=1)
            z[t] = np.where(choice[0, :] == 1)[0][0].astype(int)
            x[:, t + 1] = (A[:, :-D_bias, z[t]] @ x[:, t][:, na] +  \
                          A[:, -D_bias:, z[t]] @ u[:, t][:, na] + \
                          npr.multivariate_normal(np.zeros(D_in), Q[:, :, z[t]])[:, na]).flatten()

        else:  # Use Bayes classifier to choose discrete latent state and add no noise to continuous latent states
            z[t] = np.argmax(choice)
            x[:, t + 1] = (A[:, :-D_bias, z[t]] @ x[:, t][:, na] + \
                           A[:, -D_bias:, z[t]] @ u[:, t][:, na]).flatten()

    log_p = compute_leaf_log_prob(R, x[:, -1], K, depth, leaf_path)
    p_unnorm = np.exp(log_p - np.max(log_p))
    p = p_unnorm / np.sum(p_unnorm)
    choice = npr.multinomial(1, p.ravel(), size=1)
    z[-1] = np.where(choice[0, :] == 1)[0][0]
    return x, z
示例#51
0
def main():
    n = 1000
    cov = [[1, 0.9], [0.9, 1]]
    beta = 0.9
    p_con, p_dis = 0.5, 0.5
    gt = (-p_con * 0.5 * log(np.linalg.det(cov)) + p_dis *
          (log(2) + beta * log(beta) + (1 - beta) * log(1 - beta)) -
          p_con * log(p_con) - p_dis * log(p_dis))

    x_con, y_con = nr.multivariate_normal([0, 0], cov, int(n * p_con)).T
    x_dis = nr.binomial(1, 0.5, int(n * p_dis))
    y_dis = (x_dis + nr.binomial(1, 1 - beta, int(n * p_dis))) % 2
    x_dis, y_dis = 2 * x_dis - np.ones(int(n * p_dis)), 2 * y_dis - np.ones(
        int(n * p_dis))
    x = np.concatenate((x_con, x_dis)).reshape((n, 1))
    y = np.concatenate((y_con, y_dis)).reshape((n, 1))

    print("Ground Truth = ", gt)
    print("Mixed KSG: I(X:Y) = ", mixed.Mixed_KSG(x, y))
    print("Partitioning: I(X:Y) = ", mixed.Partitioning(x, y))
    print("Noisy KSG: I(X:Y) = ", mixed.Noisy_KSG(x, y))
    print("KSG: I(X:Y) = ", mixed.KSG(x, y))
示例#52
0
def generate_data(n=400):
    INPUT_FEATURES = 2
    CLASSES = 3
    means = [(-1, 0), (2, 4), (3, 1)]
    cov = [diag([1, 1]), diag([0.5, 1.2]), diag([1.5, 0.7])]
    alldata = ClassificationDataSet(INPUT_FEATURES, 1, nb_classes=CLASSES)
    minX, maxX = means[0][0], means[0][0]
    minY, maxY = means[0][1], means[0][1]
    for i in range(n):
        for klass in range(CLASSES):
            features = multivariate_normal(means[klass], cov[klass])
            x, y = features
            minX, maxX = min(minX, x), max(maxX, x)
            minY, maxY = min(minY, y), max(maxY, y)
            alldata.addSample(features, [klass])
    return {
        'minX': minX,
        'maxX': maxX,
        'minY': minY,
        'maxY': maxY,
        'd': alldata
    }
示例#53
0
    def initialize(self, x, P):
        """
        Initializes the filter with the specified mean and
        covariance. Only need to call this if you are using the filter
        to filter more than one set of data; this is called by __init__

        Parameters
        ----------

        x : np.array(dim_z)
            state mean

        P : np.array((dim_x, dim_x))
            covariance of the state
        """

        if x.ndim != 1:
            raise ValueError('x must be a 1D array')

        self.sigmas = multivariate_normal(mean=x, cov=P, size=self.N)
        self.x = x
        self.P = P
示例#54
0
def test():
    data = multivariate_normal([0, 0], [[1, 2], [2, 5]], 100)
    ### PCA
    pc_base = pca(data, base_num = 1)[0]
 
    ### Plotting
    fig = pl.figure()
    fig.add_subplot(1,1,1)
    pl.axvline(x=0, color = "#000000")
    pl.axhline(y=0, color = "#000000")
    ### Plot data
    pl.scatter(data[:, 0], data[:, 1])
    ### Draw the 1st principal axis
    pc_line = array([-3., 3.]) * (pc_base[1] / pc_base[0])
    pl.arrow(0, 0, -pc_base[0] * 2, -pc_base[1] * 2, fc = "r", width = 0.15, head_width = 0.45)
    pl.plot([-3, 3], pc_line, "r")
    ### Settings
    pl.xticks(size = 15)
    pl.yticks(size = 15)
    pl.xlim([-3, 3])
    pl.tight_layout()
    pl.show()
示例#55
0
 def setUpClass(cls):
     # Generate data
     # DGP constants
     cls.d = 5
     cls.n = 1000
     cls.n_test = 200
     cls.beta = np.array([0.25, -0.38, 1.41, 0.50, -1.22])
     # Test data
     cls.X_test = multivariate_normal(
         np.zeros(cls.d),
         np.diag(np.ones(cls.d)),
         cls.n_test)
     # Constant treatment effect and propensity
     cls.const_te_data = TestMetalearners._generate_data(
         cls.n, cls.d, cls._untreated_outcome,
         treatment_effect=TestMetalearners._const_te,
         propensity=lambda x: 0.3)
     # Heterogeneous treatment and propensity
     cls.heterogeneous_te_data = TestMetalearners._generate_data(
         cls.n, cls.d, cls._untreated_outcome,
         treatment_effect=TestMetalearners._heterogeneous_te,
         propensity=lambda x: (0.8 if (x[2] > -0.5 and x[2] < 0.5) else 0.2))
示例#56
0
 def random_mean_and_matrix_semidefinite(self,random_means,random_covariances,array = True, num_points = 1):
     ''' 
         Definition:     
             
         Parameters
         ----------    
         random_means: Matrix 
                 Have in each row a two column vector [(low = a,high = b)]
             Example:
                 means = [[0,1],[2,3],[6,7]]
                 Where each row indicates in which range the uniform random generator can takes values
         
         random_covariances: Matrix
                 Is a square matrix, where each row has a column vector, who has inside a vector with two components
             Example:
                 A matrix 3x3.
                 covariance = [[[a11,b11],[a12,b12],[a13,b13]],[[a21,b21],[a22,b22],[a23,b23]],[[a31,b31],[a32,b32],[a33,b33]]]
                 Where each row indicates in which range the uniform random generator can takes values
                 a = [[[1,2],[-5,5],[-100,6]],[[135,683],[2,285],[-135,13]],[[58,135],[16,35],[5,68478]]]
     '''
     mean = []
     for random_mean in random_means:        
         mean.append(random.uniform(low=random_mean[0],high=random_mean[1]))
     
     if array == True:
         covariance = random.rand( random_means.shape[0] , random_means.shape[0])*(random_covariances[1] - random_covariances[0]) + random_covariances[0]
         covariance = covariance * covariance.transpose()
     else:
         covariance = []
         for random_covariance in random_covariances:
             covariance.append([])
             for points in random_covariance:
                 covariance[-1].append(random.uniform(low=points[0],high=points[1]))
         covariance = np.array(covariance)
         #A*A' is a semedefinite matrix 
         covariance = covariance*covariance.transpose()
     
     
     return  [mean,multivariate_normal(mean,covariance,num_points)]
示例#57
0
def init_toy_data(num_samples, num_features, num_classes, seed=3):
    # num_samples: number of samples *per class*
    # num_features: number of features (excluding bias)
    # num_classes: number of class labels
    # seed: random seed
    np.random.seed(seed)
    X = np.zeros((num_samples * num_classes, num_features))
    y = np.zeros(num_samples * num_classes)
    for c in range(num_classes):
        # initialize multivariate normal distribution for this class:
        # choose a mean for each feature
        means = uniform(low=-10, high=10, size=num_features)
        # choose a variance for each feature
        var = uniform(low=1.0, high=5, size=num_features)
        # for simplicity, all features are uncorrelated (covariance between any two features is 0)
        cov = var * np.eye(num_features)
        # draw samples from normal distribution
        X[c * num_samples:c * num_samples +
          num_samples, :] = multivariate_normal(means, cov, size=num_samples)
        # set label
        y[c * num_samples:c * num_samples + num_samples] = c
    return X, y
示例#58
0
def generate_transformed_data(experiments, reflections, sigma):
    from dials_scratch.jmp.stills.potato import PotatoOnEwaldSphere
    from numpy.random import multivariate_normal

    b1 = 9 / (2 * 0.01)
    b2 = 9 / (2 * 0.01)

    s0 = matrix.col(experiments[0].beam.get_s0())

    s1_obs = flex.vec3_double()
    s2_obs = flex.vec3_double()
    I_obs = flex.double()
    data_list = []
    for i in range(len(reflections)):

        h = matrix.col(reflections[i]["miller_index"])
        s2 = reflections[i]["s1"]

        model = PotatoOnEwaldSphere(1 / s0.length(), s2, sigma)
        mup = model.conditional_mean()
        sigmap = model.conditional_sigma()
        scale = model.scale_factor()

        data = flex.double(flex.grid(9, 9))
        points = multivariate_normal((0, 0),
                                     matrix.sqr(sigmap).as_list_of_lists(),
                                     int(scale * 1000))

        for (x, y) in points:
            jj = int(4.5 + b1 * y)
            ii = int(4.5 + b2 * x)
            if ii >= 0 and jj > 0 and ii < data.all()[1] and jj < data.all(
            )[0]:
                data[jj, ii] += 1

        print(i, len(reflections), data.as_numpy_array())
        data_list.append(data)

    return data_list
示例#59
0
def generate_synthetic_logistic_data(n, p, L, blk_nnz, gcov, nstd):
    # Generates synthetic data for the logistic regression, using the example
    # from [Friedman10]
    # n : # of observations
    # p : # of predictors
    # L : # of blocks
    # blk_nnz : # of non-zero coefs. in each block
    # gcov : correlation within groups
    # nstd : standard deviation of the added noise

    # size of each block (assumed to be an integer)
    pl = p / L

    # generating the coefficients (betas)
    coefs = np.zeros((p, 1))
    for (i, nnz) in enumerate(blk_nnz):
        blkcoefs = np.zeros((pl, 1))
        blkcoefs[0:nnz] = np.sign(rand(nnz, 1) - 0.5)
        coefs[pl * i:pl * (i + 1)] = permutation(blkcoefs)

    # generating the predictors
    mu = np.zeros(p)
    gsigma = gcov * np.ones((pl, pl))
    np.fill_diagonal(gsigma, 1.0)
    Sigma = np.kron(np.eye(L), gsigma)
    # the predictors come from a standard Gaussian multivariate distribution
    X = multivariate_normal(mu, Sigma, n)

    # linear function of the explanatory variables in X, plus noise
    t = np.dot(X, coefs) + randn(n, 1) * nstd
    # applying the logit
    Pr = 1 / (1 + np.exp(-t))
    # The response variable y[i] is a Bernoulli random variable taking
    # value 1 with probability Pr[i]
    y = rand(n, 1) <= Pr

    # we want each _column_ in X to represent a feature vector
    # y and coefs should be also 1D arrays
    return X.T, y.flatten(), coefs.flatten()
示例#60
0
def MCMC(N,lower_threshold,upper_threshold,rho_Gaussian_Process):
    markov_chain = np.array([])
    B_0=npr.normal(loc=0,scale=np.sqrt(t))
    B = npr.normal(loc=0,scale=np.sqrt(t),size=I0)
    X = np.sqrt(rho_correlation)*B_0 + np.sqrt(1-rho_correlation)*B  
    #(B_0_motion,X) = motion_generator()
    #initialize a good x0
    while (loss(X)<=lower_threshold):
        B_0=npr.normal(loc=0,scale=np.sqrt(t))
        B = npr.normal(loc=0,scale=np.sqrt(t),size=I0)
        X = np.sqrt(rho_correlation)*B_0 + np.sqrt(1-rho_correlation)*B  
    for i in range(N):
            Y = npr.multivariate_normal(mean=np.zeros(I0),cov=M_cov)
            X_intermediate=rho_Gaussian_Process*X+np.sqrt(1-rho_Gaussian_Process**2)*Y
            l=loss(X_intermediate)
            if (l>lower_threshold):
                X=X_intermediate
                markov_chain=np.append(markov_chain,l)
            else:
                markov_chain=np.append(markov_chain,loss(X))
    count=len(markov_chain[markov_chain>upper_threshold])
    return (markov_chain,count/N)