示例#1
0
def gen_prior(K_chol, sig2_omega, sig2_mu):
        th = np.zeros(parser.N)
        N = parser.idxs_and_shapes['mus'][1][0]
        parser.set(th, 'betas', K_chol.dot(npr.randn(len(lam0), K)).T)
        parser.set(th, 'omegas', np.sqrt(sig2_omega) * npr.randn(N, K))
        parser.set(th, 'mus', np.sqrt(sig2_mu) * npr.randn(N))
        return th
def adam_minimax(grad_both, init_params_max, init_params_min, callback=None, num_iters=100,
         step_size_max=0.001, step_size_min=0.001, b1=0.9, b2=0.999, eps=10**-8):
    """Adam modified to do minimiax optimization, for instance to help with
    training generative adversarial networks."""

    x_max, unflatten_max = flatten(init_params_max)
    x_min, unflatten_min = flatten(init_params_min)

    m_max = np.zeros(len(x_max))
    v_max = np.zeros(len(x_max))
    m_min = np.zeros(len(x_min))
    v_min = np.zeros(len(x_min))
    for i in range(num_iters):
        g_max_uf, g_min_uf = grad_both(unflatten_max(x_max),
                                       unflatten_min(x_min), i)
        g_max, _ = flatten(g_max_uf)
        g_min, _ = flatten(g_min_uf)

        if callback: callback(unflatten_max(x_max), unflatten_min(x_min), i,
                              unflatten_max(g_max), unflatten_min(g_min))

        m_max = (1 - b1) * g_max      + b1 * m_max  # First  moment estimate.
        v_max = (1 - b2) * (g_max**2) + b2 * v_max  # Second moment estimate.
        mhat_max = m_max / (1 - b1**(i + 1))    # Bias correction.
        vhat_max = v_max / (1 - b2**(i + 1))
        x_max = x_max + step_size_max * mhat_max / (np.sqrt(vhat_max) + eps)

        m_min = (1 - b1) * g_min      + b1 * m_min  # First  moment estimate.
        v_min = (1 - b2) * (g_min**2) + b2 * v_min  # Second moment estimate.
        mhat_min = m_min / (1 - b1**(i + 1))    # Bias correction.
        vhat_min = v_min / (1 - b2**(i + 1))
        x_min = x_min - step_size_min * mhat_min / (np.sqrt(vhat_min) + eps)
    return unflatten_max(x_max), unflatten_min(x_min)
示例#3
0
def initParam(prior, X, N, D, G, M, K, dir_param, prng):
    """ initialize variational parameters with prior parameters
    """
    
    [tpM, tpG, lb, ub] = [np.ones(M), np.ones(G), 10., 10.]
    tpR = prng.rand(2*M)
    [tau_a1, tau_a2, tau_b1, tau_b2, tau_v1, tau_v2] = \
            [lb+(ub-lb)*tpR[0 : M], tpM,\
             lb+(ub-lb)*tpR[M : 2*M], tpM, \
             tpG, tpG]

    mu_w = prng.randn(G,D,K)/np.sqrt(D)
    sigma_w = np.ones(G*D*K) * 1e-3
    mu_b = prng.randn(G, K)/np.sqrt(D)
    sigma_b = np.ones(G*K) * 1e-3

    phi = np.reshape(prng.dirichlet(np.ones(G)*dir_param, M), M*G)
    
    mu_w = np.reshape(mu_w, G*D*K)
    mu_b = np.reshape(mu_b, G*K)

    param_init = np.concatenate((tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1,\
        tau_v2, mu_w, sigma_w, mu_b, sigma_b))
    
    return param_init
示例#4
0
 def l2_norm(x, y):
     if norm_for_l2:
         xn = x/_np.sqrt((x * x).sum())
         yn = y/_np.sqrt((y * y).sum())
     else:
         xn, yn = x, y
     return ((xn - yn) ** 2).sum()
示例#5
0
def scalar_log_lik(theta_1, theta_2, x):
    arg = (x - theta_1)
    lik1 = 1.0 / np.sqrt(2 * SIGMA_x ** 2 * np.pi) * np.exp(- np.dot(arg, arg) / (2 * SIGMA_x ** 2))
    arg = (x - theta_1 - theta_2)
    lik2 = 1.0 / np.sqrt(2 * SIGMA_x ** 2 * np.pi) * np.exp(- np.dot(arg, arg) / (2 * SIGMA_x ** 2))

    return np.log(0.5 * lik1 + 0.5 * lik2)
示例#6
0
    def update(self, network):
        for i, layer in enumerate(network.parametric_layers):
            for n in layer.parameters.keys():
                grad = layer.parameters.grad[n]
                self.accu[i][n] = self.rho * self.accu[i][n] + (1.0 - self.rho) * grad ** 2
                step = grad * np.sqrt(self.d_accu[i][n] + self.eps) / np.sqrt(self.accu[i][n] + self.eps)

                layer.parameters.step(n, -step * self.lr)
                self.d_accu[i][n] = self.rho * self.d_accu[i][n] + (1.0 - self.rho) * step ** 2
 def callback(weights, iter):
     if iter % 10 == 0:
         print "max of weights", np.max(np.abs(weights))
         train_preds = undo_norm(pred_fun(weights, train_smiles))
         cur_loss = loss_fun(weights, train_smiles, train_targets)
         training_curve.append(cur_loss)
         print "Iteration", iter, "loss", cur_loss, "train RMSE", \
             np.sqrt(np.mean((train_preds - train_raw_targets)**2)),
         if validation_smiles is not None:
             validation_preds = undo_norm(pred_fun(weights, validation_smiles))
             print "Validation RMSE", iter, ":", \
                 np.sqrt(np.mean((validation_preds - validation_raw_targets) ** 2)),
示例#8
0
def dKdu(u, v):
  """
  compute the grads of a given K w.r.t. u
  you can just switch order of args to compute it for v
  """
  anorm = np.sqrt(np.sum(u*u))
  bnorm = np.sqrt(np.sum(v*v))
  den2 = (anorm * bnorm) + 1e-20 

  a = v / den2
  b = u / np.sum(np.square(u))
  c = cosine_sim(u,v)
  return a - b*c
示例#9
0
def cosine_sim(a_t, b_t):
    """
    Computes the cosine similarity of vectors a and b.
    Specifically \frac{u \cdot v}{||u|| \cdot ||v||}.
    """
    # numerator is the inner product
    num = np.dot(a_t, b_t)

    # denominator is the product of the norms
    anorm = np.sqrt(np.sum(a_t*a_t))
    bnorm = np.sqrt(np.sum(b_t*b_t))
    den2 = (anorm * bnorm) + 1e-5

    return num / den2
示例#10
0
def adadelta(paramvec, loss, batches, epochs=1, rho=0.95, epsilon=1e-6, callback=None):
    sum_gsq = np.zeros_like(paramvec)
    sum_usq = np.zeros_like(paramvec)
    vals = []

    for epoch in range(epochs):
        permuted_batches = [batches[i] for i in npr.permutation(len(batches))]
        for im, angle in permuted_batches:
            val, g = vgrad(loss)(paramvec, im, angle)
            sum_gsq = rho*sum_gsq + (1.-rho)*g**2
            ud = -np.sqrt(sum_usq + epsilon) / np.sqrt(sum_gsq + epsilon) * g
            sum_usq = rho*sum_usq + (1.-rho)*ud**2
            paramvec = paramvec + ud
            vals.append(val)
        if callback: callback(epoch, paramvec, vals, permuted_batches)
    return paramvec
def cost(usv):
    delta = .5
    u = usv[0]
    s = usv[1]
    vt = usv[2]
    X = np.dot(np.dot(u, np.diag(s)), vt)
    return np.sum(np.sqrt((X - A)**2 + delta**2) - delta)
    def plot_single_gp(ax, params, layer, unit, plot_xs):
        ax.cla()
        rs = npr.RandomState(0)

        deep_map = create_deep_map(params)
        gp_details = deep_map[layer][unit]
        gp_params = pack_gp_params(gp_details)

        pred_mean, pred_cov = predict_layer_funcs[layer][unit](gp_params, plot_xs, with_noise = False, FITC = False)
        x0 = deep_map[layer][unit]['x0']
        y0 = deep_map[layer][unit]['y0']
        noise_scale = deep_map[layer][unit]['noise_scale']

        marg_std = np.sqrt(np.diag(pred_cov))
        if n_samples_to_plot > 19:
            ax.plot(plot_xs, pred_mean, 'b')
            ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]),
            np.concatenate([pred_mean - 1.96 * marg_std,
                           (pred_mean + 1.96 * marg_std)[::-1]]),
                           alpha=.15, fc='Blue', ec='None')

        # Show samples from posterior.
        sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov*(random), size=n_samples_to_plot)
        ax.plot(plot_xs, sampled_funcs.T)
        ax.plot(x0, y0, 'ro')
        #ax.errorbar(x0, y0, yerr = noise_scale, fmt='o')
        ax.set_xticks([])
        ax.set_yticks([])
示例#13
0
    def loglikelihood(self, g, beta, mu_ivp, alpha, pi, priors):
        
        logprobs = []
        for i, ifx in enumerate(self._ifix):
            # get the logprobability for each mixture component
            ll = 0.
            
            zM = self._forward(g, beta, mu_ivp[i], ifx)
            for q, yq in enumerate(self.Y_train_):
                ll += norm.logpdf(
                    yq, zM[..., q], scale=1/np.sqrt(alpha)).sum()

            logprobs.append(ll + np.log(pi[i]))
        logprobs = np.array(logprobs)

        lpmax = max(logprobs)

        loglik = lpmax + np.log(np.exp(logprobs - lpmax).sum())

        Cg = self.latentforces[0].kernel(self.ttc[:, None])
        Cg[np.diag_indices_from(Cg)] += 1e-5
        Lg = np.linalg.cholesky(Cg)
        logprior = -0.5 * g.dot(cho_solve((Lg, True), g)) - \
                   np.log(np.diag(Lg)).sum() - \
                   Lg.shape[0] / 2 * np.log(2 * np.pi)


        for vn, x in zip(['beta'], beta):
            try:
                prior_logpdf = priors[vn]
                logprior += prior_logpdf(x)
            except KeyError:
                pass

        return loglik + logprior
示例#14
0
    def _get_responsibilities(self, pi, g, beta, mu_ivp, alpha):
        """ Gets the posterior responsibilities for each comp. of the mixture.
        """
        probs = [[]]*len(self.N_data)
        for i, ifx in enumerate(self._ifix):

            zM = self._forward(g, beta, mu_ivp[i], ifx)

            for q, yq in enumerate(self.Y_train_):
                logprob = norm.logpdf(
                    yq, zM[self.data_inds[q], :, q], scale=1/np.sqrt(alpha))

                # sum over the dimension component
                logprob = logprob.sum(-1)

                if probs[q] == []:
                    probs[q] = logprob

                else:
                    probs[q] = np.column_stack((probs[q], logprob))
        probs = [lp - pi for lp in probs]
        # subtract the maxmium for exponential normalize
        probs = [p - np.atleast_1d(p.max(axis=-1))[:, None]
                 for p in probs]
        probs = [np.exp(p) / np.exp(p).sum(-1)[:, None] for p in probs]

        return probs
示例#15
0
    def predict_percentile(self, X, ancillary_X=None, p=0.5):
        """
        Returns the median lifetimes for the individuals, by default. If the survival curve of an
        individual does not cross ``p``, then the result is infinity.
        http://stats.stackexchange.com/questions/102986/percentile-loss-functions

        Parameters
        ----------
        X:  numpy array or DataFrame
            a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
            can be in any order. If a numpy array, columns must be in the
            same order as the training data.
        ancillary_X: numpy array or DataFrame, optional
            a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns
            can be in any order. If a numpy array, columns must be in the
            same order as the training data.
        p: float, optional (default=0.5)
            the percentile, must be between 0 and 1.

        Returns
        -------
        percentiles: DataFrame

        See Also
        --------
        predict_median

        """
        exp_mu_, sigma_ = self._prep_inputs_for_prediction_and_return_scores(X, ancillary_X)
        return pd.DataFrame(exp_mu_ * np.exp(np.sqrt(2) * sigma_ * erfinv(2 * p - 1)), index=_get_index(X))
示例#16
0
def adam(grad,
         x,
         batch_id=None,
         num_batches=None,
         callback=None,
         num_iters=100,
         step_size=0.001,
         b1=0.9,
         b2=0.999,
         eps=10**-8):
    """Adam as described in http://arxiv.org/pdf/1412.6980.pdf.
    It's basically RMSprop with momentum and some correction terms."""
    m = np.zeros(len(x))
    v = np.zeros(len(x))

    if batch_id is not None:
        scale_factor = (2**(num_batches-batch_id)) / (2**(num_batches-1))
    else:
        scale_factor = 1

    for i in range(num_iters):
        g = grad(x, scale_factor)
        if callback: callback(x, i, g)
        m = (1 - b1) * g      + b1 * m  # First  moment estimate.
        v = (1 - b2) * (g**2) + b2 * v  # Second moment estimate.
        mhat = m / (1 - b1**(i + 1))    # Bias correction.
        vhat = v / (1 - b2**(i + 1))
        x -= step_size*mhat/(np.sqrt(vhat) + eps)
    return x
示例#17
0
def test_div():
    fun = lambda x, y : x / y
    make_gap_from_zero = lambda x : np.sqrt(x **2 + 0.5)
    for arg1, arg2 in arg_pairs():
        arg1 = make_gap_from_zero(arg1)
        arg2 = make_gap_from_zero(arg2)
        check_grads(fun)(arg1, arg2)
示例#18
0
def opt_traj(func, fdict, T, opt_method = 'SGD', init = None, \
    learning_rate = 0.1, seed = 100, momentum = False, noise_level = 0.0):
    # do optimization and return the trajectory
    params = {'x': 0.0, 'y': 0.0}
    domain = fdict['domain']
    optimum = fdict['optimum']
    loss_and_grad = value_and_grad(func)
    #quick_grad_check(func, params)   
    params = init_params(params, domain, init, seed)
    check_grads(func, params)
    opt_server = Parameter_Server(opt_method, momentum)
    opt_server.init_gradient_storage(params)
    
    x_traj = []
    y_traj = []
    f_traj = []
    
    print 'optimising function using %s...' % opt_method
    for t in xrange(T):
        (func_value, func_grad) = loss_and_grad(params)
        x_traj.append(params['x'])
        y_traj.append(params['y'])
        f_traj.append(func_value)
        func_grad = inject_noise(func_grad, noise_level)
        if opt_method == 'SGD':
            norm = np.sqrt(func_grad['x'] ** 2 + func_grad['y'] ** 2)
            if norm >= 2.0:
                func_grad['x'] /= norm / 2; func_grad['y'] /= norm / 2
        params = opt_server.update(params, func_grad, learning_rate)

    return np.array(x_traj), np.array(y_traj), np.array(f_traj)
示例#19
0
        def multivariate_t_rvs(self, m, S, random_state = None):
            '''generate random variables of multivariate t distribution
            Parameters
            ----------
            m : array_like
                mean of random variable, length determines dimension of random variable
            S : array_like
                square array of covariance  matrix
            df : int or float
                degrees of freedom
            n : int
                number of observations, return random array will be (n, len(m))
            random_state : int
                           seed
            Returns
            -------
            rvs : ndarray, (n, len(m))
                each row is an independent draw of a multivariate t distributed
                random variable
            '''
            np.random.rand(9)
            m = np.asarray(m)
            d = self.n_features
            df = self.degree_freedom
            n = 1
            if df == np.inf:
                x = 1.
            else:
                x = random_state.chisquare(df, n)/df
            np.random.rand(90)

            z = random_state.multivariate_normal(np.zeros(d),S,(n,))
            return m + z/np.sqrt(x)[:,None]
def ackley(x):
    a, b, c = 20.0, -0.2, 2.0*np.pi
    len_recip = 1.0/len(x)
    sum_sqrs = sum(x*x)
    sum_cos = sum(np.cos(c*x))
    return (-a * np.exp(b*np.sqrt(len_recip*sum_sqrs)) -
            np.exp(len_recip*sum_cos) + a + np.e)
示例#21
0
文件: vbil.py 项目: onenoc/lfvbae
def log_prior_density(theta):
    alpha = 2
    beta = 0.5
    mu = np.log(alpha/beta)
    sigma = np.log(np.sqrt(alpha/(beta**2)))
    params = np.array([mu,sigma])
    return log_variational(params, theta)
示例#22
0
    def callback(params):
        print("Log likelihood {}".format(-objective(params)))
        plt.cla()
        print(params)
        # Show posterior marginals.
        plot_xs = np.reshape(np.linspace(-7, 7, 300), (300,1))
        pred_mean, pred_cov = predict(params, X, y, plot_xs)
        marg_std = np.sqrt(np.diag(pred_cov))
        ax.plot(plot_xs, pred_mean, 'b')
        ax.fill(np.concatenate([plot_xs, plot_xs[::-1]]),
                np.concatenate([pred_mean - 1.96 * marg_std,
                               (pred_mean + 1.96 * marg_std)[::-1]]),
                alpha=.15, fc='Blue', ec='None')

        # Show samples from posterior.
        rs = npr.RandomState(0)
        sampled_funcs = rs.multivariate_normal(pred_mean, pred_cov, size=10)
        ax.plot(plot_xs, sampled_funcs.T)

        ax.plot(X, y, 'kx')
        ax.set_ylim([-1.5, 1.5])
        ax.set_xticks([])
        ax.set_yticks([])
        plt.draw()
        plt.pause(1.0/60.0)
示例#23
0
 def __init__(self, mu, var):
     self.norm_const = - 0.5*np.log(2*np.pi)
     self.mu = np.atleast_1d(mu).flatten()
     self.var = np.atleast_1d(var).flatten() 
     self.dim = np.prod(self.var.shape)
     assert(self.mu.shape == self.var.shape)
     self.std = np.sqrt(var)
     self.logstd = np.log(self.std)
示例#24
0
def test_mod():
    fun = lambda x, y : x % y
    make_gap_from_zero = lambda x : np.sqrt(x **2 + 0.5)
    for arg1, arg2 in arg_pairs():
        if not arg1 is arg2:  # Gradient undefined at x == y
            arg1 = make_gap_from_zero(arg1)
            arg2 = make_gap_from_zero(arg2)
            check_grads(fun)(arg1, arg2)
示例#25
0
    def sample(self, n_samples=2000, observed_states=None, random_state=None):
        """Generate random samples from the self.

        Parameters
        ----------
        n : int
            Number of samples to generate.

        observed_states : array
            If provided, states are not sampled.

        random_state: RandomState or an int seed
            A random number generator instance. If None is given, the
            object's random_state is used

        Returns
        -------
        samples : array_like, length (``n_samples``)
                  List of samples

        states : array_like, shape (``n_samples``)
                 List of hidden states (accounting for tied states by giving
                 them the same index)
        """
        if random_state is None:
            random_state = self.random_state
        random_state = check_random_state(random_state)

        samples = np.zeros(n_samples)
        states = np.zeros(n_samples)

        if observed_states is None:
            startprob_pdf = np.exp(np.copy(self._log_startprob))
            startdist = stats.rv_discrete(name='custm',
                                      values=(np.arange(startprob_pdf.shape[0]),
                                                        startprob_pdf),
                                      seed=random_state)
            states[0] = startdist.rvs(size=1)[0]

            transmat_pdf = np.exp(np.copy(self._log_transmat))
            transmat_cdf = np.cumsum(transmat_pdf, 1)

            nrand = random_state.rand(n_samples)
            for idx in range(1,n_samples):
                newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax()
                states[idx] = newstate
        else:
            states = observed_states

        mu = np.copy(self._mu_)
        precision = np.copy(self._precision_)
        for idx in range(n_samples):
            mean_ = self._mu_[states[idx]]
            var_ = np.sqrt(1/precision[states[idx]])
            samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1,
                                    random_state=random_state)
        states = self._process_sequence(states)
        return samples, states
示例#26
0
def rmsprop(grad, x, callback=None, num_iters=100, step_size=0.1, gamma=0.9, eps = 10**-8):
    """Root mean squared prop: See Adagrad paper for details."""
    avg_sq_grad = np.ones(len(x))
    for i in range(num_iters):
        g = grad(x, i)
        if callback: callback(x, i, g)
        avg_sq_grad = avg_sq_grad * gamma + g**2 * (1 - gamma)
        x -= step_size * g/(np.sqrt(avg_sq_grad) + eps)
    return x
 def sample_from_mvn(mu, sigma,rs = npr.RandomState(0),FITC = False):
     if FITC:
         #if not np.allclose(sigma, np.diag(np.diag(sigma))):
         #    print("NOT DIAGONAL")
         #    return np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu
         return np.dot(np.sqrt(sigma+1e-6),rs.randn(len(sigma)))+mu if random == 1 else mu
         #return np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu
     else:
         return np.dot(np.linalg.cholesky(sigma+1e-6*np.eye(len(sigma))),rs.randn(len(sigma)))+mu if random == 1 else mu
示例#28
0
def natural_sample(J, h, num_samples=None, rng=rng):
    sample_shape = (num_samples,) + h.shape if num_samples else h.shape
    J = -2*J
    if J.ndim == 1:
        return h / J + rng.normal(size=sample_shape) / np.sqrt(J)
    else:
        L = np.linalg.cholesky(J)
        noise = solve_triangular(L, rng.normal(size=sample_shape).T, trans='T')
        return solve_posdef_from_cholesky(L, h.T).T + noise.T
 def get_error_and_ll(w, v_prior, X, y, K, location, scale):
     v_noise = np.exp(parser.get(w, 'log_v_noise')[ 0, 0 ]) * scale**2
     q = get_parameters_q(w, v_prior)
     samples_q = draw_samples(q, K)
     outputs = predict(samples_q, X) * scale + location
     log_factor = -0.5 * np.log(2 * math.pi * v_noise) - 0.5 * (np.tile(y, (1, K)) - np.array(outputs))**2 / v_noise
     ll = np.mean(logsumexp(log_factor - np.log(K), 1))
     error = np.sqrt(np.mean((y - np.mean(outputs, 1, keepdims = True))**2))
     return error, ll
def devec_ackley(x):
    a, b, c = 20.0, -0.2, 2.0*np.pi
    len_recip = 1.0/len(x)
    sum_sqrs, sum_cos = 0.0, 0.0
    for i in x:
        sum_cos += np.cos(c*i)
        sum_sqrs += i*i
    return (-a * np.exp(b*np.sqrt(len_recip*sum_sqrs)) -
            np.exp(len_recip*sum_cos) + a + np.e)
示例#31
0
def stepSize(iteration, sPrev, gradient, eta=1.0):
    sCur = 0.1 * (gradient**2) + 0.9 * sPrev
    step = eta * np.power(iteration, -0.5 + 1e-16) / (1. + np.sqrt(sCur))

    return step, sCur
示例#32
0
 def external_R(
         self, θ: "Model parameters", dθ: "derivatives of model parameters"
 ) -> "Extrinsic curvature radius":
     v2 = self.external_v2(θ, dθ)
     a2 = self.external_a2(θ, dθ)
     return v2 / np.sqrt(a2)
示例#33
0
def each_manifold_analysis_D1(sD1, kappa, n_t, eps=1e-8, t_vec=None):
    '''
    This function computes the manifold capacity a_Mfull, the manifold radius R_M, and manifold dimension D_M
    with margin kappa using n_t randomly sampled vectors for a single manifold defined by a set of points sD1.

    Args:
        sD1: 2D array of shape (D+1, m) where m is number of manifold points 
        kappa: Margin size (scalar)
        n_t: Number of randomly sampled vectors to use
        eps: Minimal distance (default 1e-8)
        t_vec: Optional 2D array of shape (D+1, m) containing sampled t vectors to use in evaluation

    Returns:
        a_Mfull: Calculated capacity (scalar)
        R_M: Calculated radius (scalar)
        D_M: Calculated dimension (scalar)
    '''
    # Get the dimensionality and number of manifold points
    D1, m = sD1.shape  # D+1 dimensional data
    D = D1 - 1
    # Sample n_t vectors from a D+1 dimensional standard normal distribution unless a set is given
    if t_vec is None:
        t_vec = np.random.randn(D1, n_t)
    # Find the corresponding manifold point for each random vector
    ss, gg = maxproj(t_vec, sD1)

    # Compute V, S~ for each random vector
    s_all = np.empty((D1, n_t))
    f_all = np.zeros(n_t)
    for i in range(n_t):
        # Get the t vector to use (keeping dimensions)
        t = np.expand_dims(t_vec[:, i], axis=1)
        if gg[i] + kappa < 0:
            # For this case, a solution with V = T is allowed by the constraints, so we don't need to
            # find it numerically
            v_f = t
            s_f = ss[:, i].reshape(-1, 1)
        else:
            # Get the solution for this t vector
            v_f, _, _, alpha, vminustsqk = minimize_vt_sq(t, sD1, kappa=kappa)
            f_all[i] = vminustsqk
            # If the solution vector is within eps of t, set them equal (interior point)
            if np.linalg.norm(v_f - t) < eps:
                v_f = t
                s_f = ss[:, i].reshape(-1, 1)
            else:
                # Otherwise, compute S~ from the solution
                scale = np.sum(alpha)
                s_f = (t - v_f) / scale
        # Store the calculated values
        s_all[:, i] = s_f[:, 0]

    # Compute the capacity from eq. 16, 17 in 2018 PRX paper.
    max_ts = np.maximum(np.sum(t_vec * s_all, axis=0) + kappa, np.zeros(n_t))
    s_sum = np.sum(np.square(s_all), axis=0)
    lamb = np.asarray(
        [max_ts[i] / s_sum[i] if s_sum[i] > 0 else 0 for i in range(n_t)])
    slam = np.square(lamb) * s_sum
    a_Mfull = 1 / np.mean(slam)

    # Compute R_M from eq. 28 of the 2018 PRX paper
    ds0 = s_all - s_all.mean(axis=1, keepdims=True)
    ds = ds0[0:-1, :] / s_all[-1, :]
    ds_sq_sum = np.sum(np.square(ds), axis=0)
    R_M = np.sqrt(np.mean(ds_sq_sum))

    # Compute D_M from eq. 29 of the 2018 PRX paper
    t_norms = np.sum(np.square(t_vec[0:D, :]), axis=0, keepdims=True)
    t_hat_vec = t_vec[0:D, :] / np.sqrt(t_norms)
    s_norms = np.sum(np.square(s_all[0:D, :]), axis=0, keepdims=True)
    s_hat_vec = s_all[0:D, :] / np.sqrt(s_norms + 1e-12)
    ts_dot = np.sum(t_hat_vec * s_hat_vec, axis=0)

    D_M = D * np.square(np.mean(ts_dot))

    return a_Mfull, R_M, D_M
示例#34
0
 def metric_tetrad(self, g) -> "Finds tetrad and inverse tetrad of g":
     v, e0 = self.metric_eigenproblem(g)
     e = np.einsum('ia,ab->ib', e0, np.diag(np.sqrt(v)))
     einv = np.einsum('ia,ab->ib', e0, np.diag(1 / np.sqrt(v)))
     return e, einv
示例#35
0
def fun_FA(centers,
           maxK,
           max_iter,
           n_repeats,
           s_all=None,
           verbose=False,
           conjugate_gradient=True):
    '''
    Extracts the low rank structure from the data given by centers

    Args:
        centers: 2D array of shape (N, P) where N is the ambient dimension and P is the number of centers
        maxK: Maximum rank to consider
        max_iter: Maximum number of iterations for the solver
        n_repeats: Number of repetitions to find the most stable solution at each iteration of K
        s: (Optional) iterable containing (P, 1) random normal vectors

    Returns:
        norm_coeff: Ratio of center norms before and after optimzation
        norm_coeff_vec: Mean ratio of center norms before and after optimization
        Proj: P-1 basis vectors
        V1_mat: Solution for each value of K
        res_coeff: Cost function after optimization for each K
        res_coeff0: Correlation before optimization
    '''
    N, P = centers.shape
    # Configure the solver
    opts = {'max_iter': max_iter, 'gtol': 1e-6, 'xtol': 1e-6, 'ftol': 1e-8}

    # Subtract the global mean
    mean = np.mean(centers.T, axis=0, keepdims=True)
    Xb = centers.T - mean
    xbnorm = np.sqrt(np.square(Xb).sum(axis=1, keepdims=True))

    # Gram-Schmidt into a P-1 dimensional basis
    q, r = qr(Xb.T, mode='economic')
    X = np.matmul(Xb, q[:, 0:P - 1])

    # Sore the (P, P-1) dimensional data before extracting the low rank structure
    X0 = X.copy()
    xnorm = np.sqrt(np.square(X0).sum(axis=1, keepdims=True))

    # Calculate the correlations
    C0 = np.matmul(X0, X0.T) / np.matmul(xnorm, xnorm.T)
    res_coeff0 = (np.sum(np.abs(C0)) - P) * 1 / (P * (P - 1))

    # Storage for the results
    V1_mat = []
    C0_mat = []
    norm_coeff = []
    norm_coeff_vec = []
    res_coeff = []

    # Compute the optimal low rank structure for rank 1 to maxK
    V1 = None
    for i in range(1, maxK + 1):
        best_stability = 0

        for j in range(1, n_repeats + 1):
            # Sample a random normal vector unless one is supplied
            if s_all is not None and len(s_all) >= i:
                s = s_all[i * j - 1]
            else:
                s = np.random.randn(P, 1)

            # Create initial V.
            sX = np.matmul(s.T, X)
            if V1 is None:
                V0 = sX
            else:
                V0 = np.concatenate([sX, V1.T], axis=0)
            V0, _ = qr(V0.T, mode='economic')  # (P-1, i)

            # Compute the optimal V for this i
            V1tmp, output = CGmanopt(
                V0, partial(square_corrcoeff_full_cost, grad=False), X, **opts)

            # Compute the cost
            cost_after, _ = square_corrcoeff_full_cost(V1tmp, X, grad=False)

            # Verify that the solution is orthogonal within tolerance
            assert np.linalg.norm(np.matmul(V1tmp.T, V1tmp) - np.identity(i),
                                  ord='fro') < 1e-10

            # Extract low rank structure
            X0 = X - np.matmul(np.matmul(X, V1tmp), V1tmp.T)

            # Compute stability of solution
            denom = np.sqrt(np.sum(np.square(X), axis=1))
            stability = min(np.sqrt(np.sum(np.square(X0), axis=1)) / denom)

            # Store the solution if it has the best stability
            if stability > best_stability:
                best_stability = stability
                best_V1 = V1tmp
            if n_repeats > 1 and verbose:
                print(j, 'cost=', cost_after, 'stability=', stability)

        # Use the best solution
        V1 = best_V1

        # Extract the low rank structure
        XV1 = np.matmul(X, V1)
        X0 = X - np.matmul(XV1, V1.T)

        # Compute the current (normalized) cost
        xnorm = np.sqrt(np.square(X0).sum(axis=1, keepdims=True))
        C0 = np.matmul(X0, X0.T) / np.matmul(xnorm, xnorm.T)
        current_cost = (np.sum(np.abs(C0)) - P) * 1 / (P * (P - 1))
        if verbose:
            print('K=', i, 'mean=', current_cost)

        # Store the results
        V1_mat.append(V1)
        C0_mat.append(C0)
        norm_coeff.append((xnorm / xbnorm)[:, 0])
        norm_coeff_vec.append(np.mean(xnorm / xbnorm))
        res_coeff.append(current_cost)

        # Break the loop if there's been no reduction in cost for 3 consecutive iterations
        if (i > 4 and res_coeff[i - 1] > res_coeff[i - 2]
                and res_coeff[i - 2] > res_coeff[i - 3]
                and res_coeff[i - 3] > res_coeff[i - 4]):
            if verbose:
                print("Optimal K0 found")
            break
    return norm_coeff, norm_coeff_vec, q[:, 0:P -
                                         1], V1_mat, res_coeff, res_coeff0
示例#36
0
def NegELBO(param, prior, X, S, Ncon, G, M, K):
    """
    Parameters
    ----------
    param: length (2M + 2M + MG + 2G + GNK + GDK + GDK + GK + GK) 
        variational parameters, including:
        1) tau_a1: len(M), first parameter of q(alpha_m)
        2) tau_a2: len(M), second parameter of q(alpha_m)
        3) tau_b1: len(M), first parameter of q(beta_m)
        4) tau_b2: len(M), second parameter of q(beta_m)
        5) phi: shape(M, G), phi[m,:] is the paramter vector of q(c_m)
        6) tau_v1: len(G), first parameter of q(nu_g)
        7) tau_v2: len(G), second parameter of q(nu_g)
        8) mu_w: shape(G, D, K), mu_w[g,d,k] is the mean parameter of 
            q(W^g_{dk})
        9) sigma_w: shape(G, D, K), sigma_w[g,d,k] is the std parameter of 
            q(W^g_{dk})
        10) mu_b: shape(G, K), mu_b[g,k] is the mean parameter of q(b^g_k)
        11) sigma_b: shape(G, K), sigma_b[g,k] is the std parameter of q(b^g_k)

    prior: dictionary
        the naming of keys follow those in param
        {'tau_a1':val1, ...}

    X: shape(N, D)
        each row represents a sample and each column represents a feature

    S: shape(n_con, 4)
        each row represents a observed constrain (expert_id, sample1_id,
        sample2_id, constraint_type), where
        1) expert_id: varies between [0, M-1]
        2) sample1 id: varies between [0, N-1]
        3) sample2 id: varies between [0, N-1]
        4) constraint_type: 1 means must-link and 0 means cannot-link

    Ncon: shape(M, 1)
        number of constraints provided by each expert

    G: int
        number of local consensus in the posterior truncated Dirichlet Process

    M: int
        number of experts

    K: int
        maximal number of clusters among different solutions, due to the use of
        discriminative clustering, some local solution might have empty
        clusters

    Returns
    -------
    """

    eps = 1e-12

    # get sample size and feature size
    [N, D] = np.shape(X)

    # unpack the input parameter vector
    [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\
            mu_b, sigma_b] = unpackParam(param, N, D, G, M, K)

    # compute eta given mu_w and mu_b
    eta = np.zeros((0, K))
    for g in np.arange(G):
        t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g])
        t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1)))
        eta = np.vstack((eta, t1 / t2))
    eta = np.reshape(eta, (G, N, K))

    # compute the expectation terms to be used later
    E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2)  # len(M)
    E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2)  # len(M)

    E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2)  # len(G)
    E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2)  # len(G)
    E_C = phi  # shape(M, G)
    E_W = mu_w  # shape(G, D, K)
    E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2  # shape(G, D, K)
    E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2  # shape(G, K)
    E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2)  # shape(G, K)

    E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \
            (prior['tau_a2']-1) * E_log_OneMinusAlpha -  \
            gammaln(prior['tau_a1']+eps) - \
            gammaln(prior['tau_a2']+eps) + \
            gammaln(prior['tau_a1']+prior['tau_a2']+eps)

    E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \
            (prior['tau_b2']-1) * E_log_OneMinusBeta - \
            gammaln(prior['tau_b1']+eps) - \
            gammaln(prior['tau_b2']+eps) + \
            gammaln(prior['tau_b1']+prior['tau_b2']+eps)

    E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \
            gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \
            gammaln(tau_a1+tau_a2 + eps)

    E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \
            gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \
            gammaln(tau_b1+tau_b2 + eps)

    E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1)

    eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K))

    # compute three terms and then add them up
    L_1, L_2, L_3 = [0., 0., 0.]
    # the first term and part of the second term
    for m in np.arange(M):
        idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m])
        tp_con = S[idx_S, 3]

        phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K)
        E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep))
        E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]]
        tp_Asum = np.sum(E_A_use)
        tp_AdotS = np.sum(E_A_use * tp_con)

        L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\
                (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \
                tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \
                E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \
                tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m])

        fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1])

        L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \
                np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G)))

    # the second term
    for g in np.arange(G):
        tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \
                np.log(prior['gamma']+eps)

        t1 = np.dot(X, mu_w[g])
        t2 = 0.5 * np.dot(X**2, sigma_w[g]**2)
        t3 = np.sum(eta[g], axis=1)
        t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2),
                            axis=1)
        tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i)

        t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \
                0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \
                (mu_w[g]-prior['mu_w'])**2)
        tp_Wg = np.sum(t5)
        t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \
                0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \
                (mu_b[g]-prior['mu_b'])**2)
        tp_bg = np.sum(t6)
        L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg

    # the third term
    L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C)
    for g in np.arange(G):
        tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\
                np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \
                np.log(gamma(tau_v1[g]+tau_v2[g])+eps)
        tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps))
        tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5)
        tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5)
        L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3

    # Note the third term should have a minus sign before it
    ELBO = L_1 + L_2 - L_3
    #ELBO = L_1 + L_2

    return -ELBO
示例#37
0
 def cost(tensor, home, appliance, time):
     pred = np.einsum('Hh, hAt, tT ->HAT', home, appliance, time)
     mask = ~np.isnan(tensor)
     error = (pred - tensor)[mask].flatten()
     return np.sqrt((error**2).mean())
示例#38
0
 def sample(self, z, x, input=None, tag=None):
     T = z.shape[0]
     z = np.zeros_like(z, dtype=int) if self.single_subspace else z
     mus = self.forward(x, input, tag)
     etas = np.exp(self.inv_etas)
     return mus[np.arange(T), z, :] + np.sqrt(etas[z]) * npr.randn(T, self.N)
示例#39
0
gpcsd_gen.temporal_cov_list[1].params['ell']['value'] = elltM_true
gpcsd_gen.temporal_cov_list[1].params['sigma2']['value'] = sig2tM_true

# %% Generate CSD and sample at interior electrode positions for comparing to tCSD
csd = gpcsd_gen.sample_prior(2 * ntrials)
csd_interior_electrodes = np.zeros((nx - 2, nt, 2 * ntrials))
for trial in range(2 * ntrials):
    csdinterp = scipy.interpolate.RectBivariateSpline(z, t, csd[:, :, trial])
    csd_interior_electrodes[:, :, trial] = csdinterp(xshort, t)

# %% Pass through forward model, add white noise
lfp = np.zeros((nx, nt, 2 * ntrials))
for trial in range(2 * ntrials):
    lfp[:, :, trial] = fwd_model_1d(csd[:, :, trial], z, x, R_true)
lfp = lfp + np.random.normal(
    0, np.sqrt(sig2n_true), size=(nx, nt, 2 * ntrials))
lfp = normalize(lfp)

# %% Visualize one trial
plt.figure()
plt.subplot(121)
plt.imshow(csd[:, :, 0], vmin=-1, vmax=1, cmap='bwr', aspect='auto')
plt.title('CSD')
plt.xlabel('Time')
plt.ylabel('depth')
plt.colorbar()
plt.subplot(122)
plt.imshow(lfp[:, :, 0], cmap='bwr', aspect='auto')
plt.title('LFP')
plt.xlabel('Time')
plt.colorbar()
 def lognorm(self,ws):
     return np.exp(-0.5*(np.log(ws) - self.norm_mean)**2 /self.norm_sig**2)/np.sqrt(2*np.pi)/self.norm_sig/ws;
示例#41
0
文件: mlp.py 项目: mhw32/adaware-nlp
def mat_cosine_dist(X, Y):
    prod = np.diagonal(np.dot(X, Y.T), offset=0, axis1=-1, axis2=-2)
    len1 = np.sqrt(np.diagonal(np.dot(X, X.T), offset=0, axis1=-1, axis2=-2))
    len2 = np.sqrt(np.diagonal(np.dot(Y, Y.T), offset=0, axis1=-1, axis2=-2))
    return np.divide(np.divide(prod, len1), len2)
示例#42
0
    def fit(self, batch_size, epochs=500, learning_rate=0.0001):
        """STEP 1: Set up what the optimization routine will be"""
        """Just to streamline with GVI code, re-name variables"""
        self.M = min(batch_size, self.n)
        Y = self.Y
        X = self.X
        """Create objective & take gradient"""
        objective = self.create_objective()
        objective_gradient = grad(objective)
        params = self.params
        """STEP 2: Sample from X, Y and perform ADAM steps"""
        """STEP 2.1: These are just the ADAM optimizer default settings"""
        m1 = 0
        m2 = 0
        beta1 = 0.9
        beta2 = 0.999
        epsilon = 1e-8
        t = 0
        """STEP 2.2: Loop over #epochs and take step for each subsample"""
        for epoch in range(epochs):
            """STEP 2.2.1: For each epoch, shuffle the data"""
            permutation = np.random.choice(range(Y.shape[0]),
                                           Y.shape[0],
                                           replace=False)
            """HERE: Should add a print statement here to monitor algorithm!"""
            if epoch % 100 == 0:
                print("epoch #", epoch, "/", epochs)
                #print("sigma2", np.exp(-q_params[3]))
            """STEP 2.2.2: Process M data points together and take one step"""
            for i in range(0, int(self.n / self.M)):
                """Get the next M observations (or less if we would run out
                of observations otherwise)"""
                end = min(self.n, (i + 1) * self.M)
                indices = permutation[(i * self.M):end]
                """ADAM step for this batch"""
                t += 1
                if X is not None:
                    if False:
                        print("Y", Y[indices])
                        print(
                            "X*coefs",
                            np.matmul(X[indices, :],
                                      np.array([1.0, -2.0, 0.5, 4.0, -3.5])))
                        print("X*params", np.matmul(X[indices, :],
                                                    params[:-1]))

                    grad_params = objective_gradient(params, self.parser,
                                                     Y[indices], X[indices, :])
                else:
                    grad_params = objective_gradient(params,
                                                     self.parser,
                                                     Y[indices],
                                                     X_=None)

#                print(grad_params)
#                print("before:", params)
                m1 = beta1 * m1 + (1 - beta1) * grad_params
                m2 = beta2 * m2 + (1 - beta2) * grad_params**2
                m1_hat = m1 / (1 - beta1**t)
                m2_hat = m2 / (1 - beta2**t)
                params -= learning_rate * m1_hat / (np.sqrt(m2_hat) + epsilon)


#                print("after", params)

        self.params = params
示例#43
0
def ELBO_terms(param, prior, X, S, Ncon, G, M, K):
    eps = 1e-12

    # get sample size and feature size
    [N, D] = np.shape(X)

    # unpack the input parameter vector
    [tau_a1, tau_a2, tau_b1, tau_b2, phi, tau_v1, tau_v2, mu_w, sigma_w,\
            mu_b, sigma_b] = unpackParam(param, N, D, G, M, K)

    # compute eta given mu_w and mu_b
    eta = np.zeros((0, K))
    for g in np.arange(G):
        t1 = np.exp(np.dot(X, mu_w[g]) + mu_b[g])
        t2 = np.transpose(np.tile(np.sum(t1, axis=1), (K, 1)))
        eta = np.vstack((eta, t1 / t2))
    eta = np.reshape(eta, (G, N, K))

    # compute the expectation terms to be used later
    E_log_Alpha = digamma(tau_a1) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_OneMinusAlpha = digamma(tau_a2) - digamma(tau_a1 + tau_a2)  # len(M)
    E_log_Beta = digamma(tau_b1) - digamma(tau_b1 + tau_b2)  # len(M)
    E_log_OneMinusBeta = digamma(tau_b2) - digamma(tau_b1 + tau_b2)  # len(M)

    E_log_Nu = digamma(tau_v1) - digamma(tau_v1 + tau_v2)  # len(G)
    E_log_OneMinusNu = digamma(tau_v2) - digamma(tau_v1 + tau_v2)  # len(G)
    E_C = phi  # shape(M, G)
    E_W = mu_w  # shape(G, D, K)
    E_WMinusMuSqd = sigma_w**2 + (mu_w - prior['mu_w'])**2  # shape(G, D, K)
    E_BMinusMuSqd = sigma_b**2 + (mu_b - prior['mu_b'])**2  # shape(G, K)
    E_ExpB = np.exp(mu_b + 0.5 * sigma_b**2)  # shape(G, K)

    E_logP_Alpha = (prior['tau_a1']-1) * E_log_Alpha + \
            (prior['tau_a2']-1) * E_log_OneMinusAlpha -  \
            gammaln(prior['tau_a1']+eps) - \
            gammaln(prior['tau_a2']+eps) + \
            gammaln(prior['tau_a1']+prior['tau_a2']+eps)

    E_logP_Beta = (prior['tau_b1']-1) * E_log_Beta + \
            (prior['tau_b2']-1) * E_log_OneMinusBeta - \
            gammaln(prior['tau_b1']+eps) - \
            gammaln(prior['tau_b2']+eps) + \
            gammaln(prior['tau_b1']+prior['tau_b2']+eps)

    E_logQ_Alpha = (tau_a1-1)*E_log_Alpha + (tau_a2-1)*E_log_OneMinusAlpha - \
            gammaln(tau_a1 + eps) - gammaln(tau_a2 + eps) + \
            gammaln(tau_a1+tau_a2 + eps)

    E_logQ_Beta = (tau_b1-1)*E_log_Beta + (tau_b2-1)*E_log_OneMinusBeta - \
            gammaln(tau_b1 + eps) - gammaln(tau_b2 + eps) + \
            gammaln(tau_b1+tau_b2 + eps)

    E_logQ_C = np.sum(phi * np.log(phi + eps), axis=1)

    eta_N_GK = np.reshape(np.transpose(eta, (1, 0, 2)), (N, G * K))

    # compute three terms and then add them up
    L_1, L_2, L_3 = [0., 0., 0.]
    # the first term and part of the second term
    for m in np.arange(M):
        idx_S = range(sum(Ncon[:m]), sum(Ncon[:m]) + Ncon[m])
        tp_con = S[idx_S, 3]

        phi_rep = np.reshape(np.transpose(np.tile(phi[m], (K, 1))), G * K)
        E_A = np.dot(eta_N_GK, np.transpose(eta_N_GK * phi_rep))
        E_A_use = E_A[S[idx_S, 1], S[idx_S, 2]]
        tp_Asum = np.sum(E_A_use)
        tp_AdotS = np.sum(E_A_use * tp_con)

        L_1 = L_1 + Ncon[m]*E_log_Beta[m] + np.sum(tp_con)*\
                (E_log_OneMinusBeta[m]-E_log_Beta[m]) + \
                tp_AdotS * (E_log_Alpha[m] + E_log_Beta[m] - \
                E_log_OneMinusAlpha[m] - E_log_OneMinusBeta[m]) + \
                tp_Asum * (E_log_OneMinusAlpha[m] - E_log_Beta[m])

        fg = lambda g: phi[m, g] * np.sum(E_log_OneMinusNu[0:g - 1])

        L_2 = L_2 + E_logP_Alpha[m] + E_logP_Beta[m] + \
                np.dot(phi[m],E_log_Nu) + np.sum(map(fg, np.arange(G)))

    # the second term
    for g in np.arange(G):
        tp_Nug = (prior['gamma']-1)*E_log_OneMinusNu[g] + \
                np.log(prior['gamma']+eps)

        t1 = np.dot(X, mu_w[g])
        t2 = 0.5 * np.dot(X**2, sigma_w[g]**2)
        t3 = np.sum(eta[g], axis=1)
        t_mat_i = logsumexp(np.add(mu_b[g] + 0.5 * sigma_b[g]**2, t1 + t2),
                            axis=1)
        tp_Zg = np.sum(eta[g] * np.add(t1, mu_b[g])) - np.dot(t3, t_mat_i)

        t5 = -np.log(np.sqrt(2*np.pi)*prior['sigma_w']) - \
                0.5/(prior['sigma_w']**2) * (sigma_w[g]**2 + \
                (mu_w[g]-prior['mu_w'])**2)
        tp_Wg = np.sum(t5)
        t6 = -np.log(np.sqrt(2*np.pi)*prior['sigma_b']+eps) - \
                0.5/(prior['sigma_b']**2) * (sigma_b[g]**2 + \
                (mu_b[g]-prior['mu_b'])**2)
        tp_bg = np.sum(t6)
        L_2 = L_2 + tp_Nug + tp_Zg + tp_Wg + tp_bg

    # the third term
    L_3 = np.sum(E_logQ_Alpha + E_logQ_Beta + E_logQ_C)
    for g in np.arange(G):
        tp_Nug3 = (tau_v1[g]-1)*E_log_Nu[g]+(tau_v2[g]-1)*E_log_OneMinusNu[g] -\
                np.log(gamma(tau_v1[g])+eps) - np.log(gamma(tau_v2[g])+eps) + \
                np.log(gamma(tau_v1[g]+tau_v2[g])+eps)
        tp_Zg3 = np.sum(eta[g] * np.log(eta[g] + eps))
        tp_Wg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_w[g] + eps) - 0.5)
        tp_bg3 = np.sum(-np.log(np.sqrt(2 * np.pi) * sigma_b[g] + eps) - 0.5)
        L_3 = L_3 + tp_Nug3 + tp_Zg3 + tp_Wg3 + tp_bg3

    return (L_1, L_2, L_3)
示例#44
0
 def embedded_ω(self, H, P, g, dθ) -> "Extrinsic frequency":
     v2 = self.embedded_velocity2(g, dθ)
     a2 = self.embedded_acceleration2(H, P, dθ)
     return np.sqrt(np.abs(v2 * a2))
示例#45
0
 def func(y, t, arg1, arg2):
     return -np.sqrt(t) - y + arg1 - np.mean((y + arg2)**2)
示例#46
0
def test_unary():
    grad_test(lambda x: ti.sqrt(x), lambda x: np.sqrt(x))
    grad_test(lambda x: ti.exp(x), lambda x: np.exp(x))
    grad_test(lambda x: ti.log(x), lambda x: np.log(x))
示例#47
0
from ubvi.autograd import logsumexp


def logp(x):
    return (-np.log(1 + x**2) - np.log(np.pi)).flatten()


np.random.seed(1)

N_runs = 20
N = 30
d = 1
diag = True
n_samples = 2000
n_logfg_samples = 10000
adam_learning_rate = lambda itr: 10. / np.sqrt(itr + 1)
adam_num_iters = 10000
n_init = 10000
init_inflation = 16
lmb = lambda itr: 1. / np.sqrt(1 + itr)

gauss = Gaussian(d, diag)
adam = lambda x0, obj, grd: ubvi_adam(x0,
                                      obj,
                                      grd,
                                      adam_learning_rate,
                                      adam_num_iters,
                                      callback=gauss.print_perf)

if not os.path.exists('results/'):
    os.mkdir('results')
示例#48
0
import sys
sys.path.append('../../src/')
import autograd.numpy as np
from dOTDmodel import dOTDModel
from gendata import GenData
from rhs import rhs_3D


if __name__ == '__main__':

    notd = 3   	   # Number of dOTD modes to be learned
    npts = 10      # Number of training points
    rhs = rhs_3D   # Right-hand side in governing equations

    ### Generate long trajectory
    mu = 0.1; sqmu = np.sqrt(mu)
    z0 = np.array([sqmu*np.cos(1), sqmu*np.sin(1), mu+1e-3])
    ndim = z0.shape[0]
    u0 = np.array([[-0.84, -0.40, -0.36], \
                   [ 0.54, -0.63, -0.55], \
                   [ 0.00, -0.65,  0.75]])
    dt = 0.01
    tf = 50
    gen = GenData(z0, u0, tf, dt, rhs)
    t, Z, U = gen.trajectory()

    ### Generate training, validation, and testing sets
    kwargs = dict(rec=True, n_neighbors=7)
    ind_trn = np.where((t >= 20) & (t < 20+2*np.pi))[0] 
    a = np.floor(len(ind_trn)/(npts-1))
    ind_trn = ind_trn[::int(a)]
示例#49
0
 def external_ωv(
     self, θ: "Model parameters", dθ: "derivatives of model parameters"
 ) -> "Extrinsic normalized frequency":
     v2 = self.external_v2(θ, dθ)
     a2 = self.external_a2(θ, dθ)
     return np.sqrt(np.abs(a2))
示例#50
0
def gaussian(x, loc=None, scale=None):
    ''' N(x; loc, scale) 
    '''
    y = (x - loc) / scale
    return np.exp(-0.5 * y**2) / np.sqrt(2. * np.pi) / scale
示例#51
0
        [initial_mean, initial_log_sigma])
    # Optimize
    print("-> Optimizing variational parameters...")
    print("-> Initial ELBO: {}".format(ELBO(initial_variational_params)))
    vparams = initial_variational_params
    for epoch in range(num_epochs):
        lr = next(sched)
        ### Adam optimizer
        g = gradient(vparams)
        m = beta1 * m + (1 - beta1) * g
        v = beta2 * v + (1 - beta2) * (g**2)
        # Correcting biased terms
        mhat = m / (1 - beta1**(epoch + 1))
        vhat = v / (1 - beta2**(epoch + 1))
        # Update step
        vparams -= lr * mhat / (np.sqrt(vhat) + epsilon)

        ### Logging and sampling from posterior
        print("Epoch {} -> ELBO: {}".format(epoch, ELBO(vparams)))
        # Sample from posterior
        num_posterior_samples = 10
        mu, log_sigma = vparams[:num_weights], vparams[num_weights:]
        posterior_samples = mu + np.exp(log_sigma) * np.random.randn(
            num_posterior_samples, num_weights)

        plot_inputs = np.linspace(-8, 8, num=400)
        outputs = forward(posterior_samples, np.expand_dims(plot_inputs, 1))
        # Plot
        plt.cla()
        ax.plot(inputs.ravel(), targets.ravel(), 'bx')
        ax.plot(plot_inputs, outputs[:, :, 0].T)
示例#52
0
 def percentile(self, p):
     return np.exp(self.mu_ +
                   np.sqrt(2 * self.sigma_**2) * erfinv(1 - 2 * p))
def molecular_pursuit(
        target,
        code_coefs,
        basis_size=4,
        rtol=0.01,  # Dependence on this parameter is sensitive
        rcond=1e-3,  # lax colinearity condition for approximation
        cutoff=100.0,  # higher gains than this are unlikely to be stable
        pitched=True,
        match_rtol=0.01,  # stop search early if good enough
        verbose=0,
        **molecule_args):
    """
    In the molecular matching pursuit problem we approximate a target
    correlation profile with a codebook of correlation molecules by
    maximising inner product.

    There are various distinctions between this and the atomic case.

    * we need to preserve the identity of the molecules
    * we don't need to precondition the tau term since tau and w are now coupled
    * so we only have 2 coefs
    * no bias term
    * tedious to get a basis dictionary that has unit gain
    * despite this we normalise and do a true matching pursuit
    * ...
    """
    n_pts = target.size
    t = np.arange(n_pts)

    scale = max(np.sqrt(np.sum(np.square(target))), 1e-8)
    init_scale = scale
    deviance = scale
    # print("scale", scale)
    residual = target

    gain_rate = np.zeros((2, basis_size))
    gain_rate[1] = 1
    molecule_idx = -np.ones(basis_size, dtype=int)

    basis_eval = np.zeros((n_pts, basis_size))

    for i in range(basis_size):
        # print("-----", i)
        # progress = (i-1) / (basis_size-2)
        rate, idx = choose_molecule(
            residual,
            code_coefs,
            t=t,
            pitched=pitched,
            verbose=verbose,
            match_rtol=match_rtol,
            **molecule_args,
        )
        molecule_idx[i] = idx
        # print('prm', mag, w, tau, phi)
        gain_rate[:, i] = rate
        # molecule_eval = decaycos_eval(t, 1, w, tau, phi).ravel()
        # residual = target - molecule_eval
        basis_eval[:, i] = molecular_eval(
            t, *molecular_scale(code_coefs[idx], 1.0, rate))
        gain_scale, sum_resid, rank, s = lstsq(basis_eval[:, :i + 1],
                                               target.reshape(-1, 1),
                                               rcond=rcond)
        # print(basis_eval[:, :i + 1])
        # print("gain scale\n", gain_scale.ravel())
        # print("gain rate\n", gain_rate[:, :i + 1])

        if ((np.max(np.abs(gain_rate[0, :i + 1] * gain_scale.ravel())) >
             cutoff) and verbose >= 14):
            # exploding results indicate colinear molecules;
            warnings.warn(
                'exploding solution at step {}\n'
                'try raising `rcond`:\n'
                '{}->{}'.format(
                    i, gain_scale.ravel(),
                    gain_rate[0, :i + 1].ravel() * gain_scale.ravel()))
            # Now what?
        else:
            gain_rate[0, :i + 1] *= gain_scale.ravel()
            basis_eval[:, :i + 1] *= gain_scale.reshape((1, -1))

        curr_approx = np.sum(basis_eval[:, :i + 1], axis=1)

        new_residual = target - curr_approx
        # plt.figure()
        # plt.plot(target, label='target')
        # plt.plot(curr_approx, label='hat')
        # plt.plot(residual, label='oldres')
        # plt.plot(new_residual, label='newres')
        # plt.plot(basis_eval[:, i]*mag, label='new_molecular')
        # plt.legend()
        # plt.show()
        # import pdb; pdb.set_trace()
        residual = new_residual
        new_deviance = np.sqrt(np.sum(np.square(new_residual)))
        # print("deviance", deviance, "/", scale)
        if deviance - new_deviance < rtol * scale:
            # we didn't improve the match so we won't next step either
            if verbose >= 17:
                print("failed to improve", new_deviance, "-", deviance, "<",
                      rtol, "*", scale)
            break

        deviance = new_deviance
        scale /= new_deviance

    loss = deviance / scale
    return gain_rate[:, :i + 1], molecule_idx[:i + 1], loss, init_scale
示例#54
0
 def _evaluate(self, x, out, *args, **kwargs):
     part1 = -1. * self.a * anp.exp(-1. * self.b * anp.sqrt((1. / self.n_var) * anp.sum(x * x, axis=1)))
     part2 = -1. * anp.exp((1. / self.n_var) * anp.sum(anp.cos(self.c * x), axis=1))
     out["F"] = part1 + part2 + self.a + anp.exp(1)
def choose_molecule_pitch_opt(target,
                              code_coef,
                              maxiter=5,
                              t=None,
                              lr=0.01,
                              low_pitch=0.5**0.5,
                              high_pitch=2.0**0.5,
                              n_starts=65,
                              trace=False,
                              pdb=False,
                              verbose=0,
                              norm_method='analytic',
                              **molecule_args):
    """
    choose pitch for one molecule and return inner product at that pitch
    """
    if t is None:
        t = np.arange(target.size)

    rates = np.exp(
        np.linspace(np.log(low_pitch),
                    np.log(high_pitch),
                    n_starts + 2,
                    endpoint=True)[1:-1])

    max_step = (high_pitch - low_pitch) / n_starts

    def multi_objective(rates):
        """
        normalised inner product for each rate
        """
        molecules = [molecular_scale(
            code_coef,
            1,
            rate,
        ) for rate in rates]
        normecules = np.array([
            molecular_eval_norm(t,
                                *molecule,
                                norm_method=norm_method,
                                verbose=verbose) for molecule in molecules
        ])
        if not np.all(np.isfinite(normecules)) and verbose >= 1:
            exploded = np.isfinite(normecules.sum(1))
            warnings.warn(
                "{} normed molecules {} exploded with\n{} at rates\n{}".format(
                    np.sum(exploded), normecules.shape, code_coef,
                    rates[exploded]))
        obj = np.array([np.dot(normecule, target) for normecule in normecules])
        return np.nan_to_num(obj)

    grad = elementwise_grad(multi_objective)

    # f, axarr = plt.subplots(2, 1)
    if trace:
        trace_list = []
    for step_i in range(maxiter):
        # gradient ascent
        jac = grad(rates)
        if not np.all(np.isfinite(jac)) and verbose >= 1:
            warnings.warn(
                "jac exploded {}\nfor coefs \n{}\nat rate {}\nwith obj {}".
                format(jac, code_coef, rates, multi_objective(rates)))
        jac = np.nan_to_num(jac)
        step = np.clip(lr * jac, -max_step, max_step)
        if pdb:
            print(step_i, "jac", np.sqrt((jac**2).mean()), "step",
                  np.sqrt((step**2).mean()))
            from IPython.core.debugger import set_trace
            set_trace()

        # val = multi_objective(rates)
        # best = np.argmax(val)
        # stepsize = jac[best]
        # print('stepsize', stepsize)

        # axarr[0].quiver(
        #     rates,  # X
        #     val,  # Y
        #     step,  # U
        #     np.zeros_like(step),  # V
        #     np.full_like(step, step_i/(maxiter-1)),  # C
        #     cmap="magma",
        #     angles='xy',
        #     label="step {}".format(step_i))
        # axarr[1].scatter(
        #     rates,  # X
        #     val,  # Y
        #     cmap="magma",
        #     label="step {}".format(step_i))

        rates = rates + step  # gradient ascent step
        rates = np.clip(rates, low_pitch, high_pitch)
        if trace:
            trace_list.append((multi_objective(rates), rates, jac, step))
        if verbose >= 21:
            max_goodness = np.amax(multi_objective(rates))
            print(
                "max_goodness at ",
                step_i,
            )
            if not np.isfinite(max_goodness):
                from IPython.core.debugger import set_trace
                set_trace()

    if trace:
        return trace_list

    goodnesses = multi_objective(rates)
    best_idx = np.argmax(goodnesses)

    if verbose >= 11:
        print(
            "choose_molecule_pitch_opt",
            best_idx,
            rates[best_idx],
            "@",
            goodnesses[best_idx],
        )

    return rates[best_idx], goodnesses[best_idx]
示例#56
0
    def cost(tensor, home, appliance, day, hour):
        pred = np.einsum('Hr, Ar, Dr, ATr ->HADT', home, appliance, day, hour)
        mask = ~np.isnan(tensor)
        error = (pred - tensor)[mask].flatten()

        return np.sqrt((error**2).mean())
示例#57
0
def test_sqrt():
    fun = lambda x : 3.0 * np.sqrt(x)
    d_fun = grad(fun)
    check_grads(fun, 10.0*npr.rand())
    check_grads(d_fun, 10.0*npr.rand())
示例#58
0
 def embedded_radius(self, H, P, g, dθ) -> "Extrinsic curvature radius":
     v2 = self.embedded_velocity2(g, dθ)
     a2 = self.embedded_acceleration2(H, P, dθ)
     return v2 / np.sqrt(a2)
 def fun(x):
   return np.sqrt(x)
示例#60
0
 def test_typical_dist(self):
     np_testing.assert_almost_equal(self.manifold.typical_dist,
                                    np.sqrt(self.n * self.k))