Пример #1
0
        def obj(w):
            p = unpack(w)
            p.update(fixed_params)
            f = 0.0

            # ln_p_a, ln_p_mix = self.class_prior()
            ln_p_a = np.log(self.action(p))  # individual- and time-invariant
            logits_mix = p[self.mixture_param_key]
            ln_p_mix = logits_mix - logsumexp(logits_mix)

            for y, x in samples:
                # Outcome model
                mixture = log_likelihood(p, y, x, self.mean, self.cov, self.tr,
                                         ln_p_a, ln_p_mix)
                f -= logsumexp(np.array(mixture))

                # Action model
                _, rx = x
                f -= action_log_likelihood(rx, ln_p_a, self.tr_cont_flag)

            # Regularizers
            for k, _ in trainable_params.items():
                if k.endswith('_F'):
                    f += np.sum(p[k]**2)

            return f
Пример #2
0
def sinkhorn_logspace(logP, niters=10):
    for _ in range(niters):
        # Normalize columns and take the log again
        logP = logP - logsumexp(logP, axis=0, keepdims=True)
        # Normalize rows and take the log again
        logP = logP - logsumexp(logP, axis=1, keepdims=True)
    return logP
Пример #3
0
def hmm_expected_states(log_pi0, log_Ps, ll):
    T, K = ll.shape

    # Make sure everything is C contiguous
    log_pi0 = to_c(log_pi0)
    log_Ps = to_c(log_Ps)
    ll = to_c(ll)

    alphas = np.zeros((T, K))
    forward_pass(log_pi0, log_Ps, ll, alphas)
    normalizer = logsumexp(alphas[-1])

    betas = np.zeros((T, K))
    backward_pass(log_Ps, ll, betas)    

    expected_states = alphas + betas
    expected_states -= logsumexp(expected_states, axis=1, keepdims=True)
    expected_states = np.exp(expected_states)
    
    expected_joints = alphas[:-1,:,None] + betas[1:,None,:] + ll[1:,None,:] + log_Ps
    expected_joints -= expected_joints.max((1,2))[:,None, None]
    expected_joints = np.exp(expected_joints)
    expected_joints /= expected_joints.sum((1,2))[:,None,None]
    
    return expected_states, expected_joints, normalizer
Пример #4
0
def hmm_expected_states(log_pi0, log_Ps, ll):
    T, K = ll.shape

    # Make sure everything is C contiguous
    to_c = lambda arr: np.copy(arr, 'C') if not arr.flags['C_CONTIGUOUS'] else arr
    log_pi0 = to_c(getval(log_pi0))
    log_Ps = to_c(getval(log_Ps))
    ll = to_c(getval(ll))

    alphas = np.zeros((T, K))
    forward_pass(log_pi0, log_Ps, ll, alphas)
    normalizer = logsumexp(alphas[-1])

    betas = np.zeros((T, K))
    backward_pass(log_Ps, ll, betas)    

    expected_states = alphas + betas
    expected_states -= logsumexp(expected_states, axis=1, keepdims=True)
    expected_states = np.exp(expected_states)
    
    expected_joints = alphas[:-1,:,None] + betas[1:,None,:] + ll[1:,None,:] + log_Ps
    expected_joints -= expected_joints.max((1,2))[:,None, None]
    expected_joints = np.exp(expected_joints)
    expected_joints /= expected_joints.sum((1,2))[:,None,None]
    
    return expected_states, expected_joints, normalizer
Пример #5
0
def location_mixture_logpdf(samps, locations, location_weights, distr_at_origin, contr_var = False, variant = 1):
#    lpdfs = zeroprop.logpdf()
    diff = samps - locations[:, np.newaxis, :]
    lpdfs = distr_at_origin.logpdf(diff.reshape([np.prod(diff.shape[:2]), diff.shape[-1]])).reshape(diff.shape[:2])
    logprop_weights = log(location_weights/location_weights.sum())[:, np.newaxis]
    if not contr_var: 
        return logsumexp(lpdfs + logprop_weights, 0)
    #time_m1 = np.hstack([time0[:,:-1],time0[:,-1:]])
    else:
        time0 = lpdfs + logprop_weights + log(len(location_weights))
        
        if variant == 1:
            time1 = np.hstack([time0[:,1:],time0[:,:1]])
            cov = np.mean(time0**2-time0*time1)
            var = np.mean((time0-time1)**2)
            lpdfs = lpdfs  -    cov/var * (time0-time1)        
            return logsumexp(lpdfs - log(len(location_weights)), 0)
        elif variant == 2:
            cvar = (time0[:,:,np.newaxis] - 
                    np.dstack([np.hstack([time0[:, 1:], time0[:, :1]]),
                               np.hstack([time0[:,-1:], time0[:,:-1]])]))

            
            ## self-covariance matrix of control variates
            K_cvar = np.diag(np.mean(cvar**2, (0, 1)))
            #add off diagonal
            K_cvar = K_cvar + (1.-np.eye(2)) * np.mean(cvar[:,:,0]*cvar[:,:,1])
            
            ## covariance of control variates with random variable
            cov = np.mean(time0[:,:,np.newaxis] * cvar, 0).mean(0)
            
            optimal_comb = np.linalg.inv(K_cvar) @ cov
            lpdfs = lpdfs  -  cvar @ optimal_comb
            return logsumexp(lpdfs - log(len(location_weights)), 0)
Пример #6
0
def hmm_logZ_python(natparam):
    init_params, pair_params, node_params = natparam

    log_alpha = init_params + node_params[0]
    for node_param in node_params[1:]:
        log_alpha = logsumexp(log_alpha[:,None] + pair_params, axis=0) + node_param

    return logsumexp(log_alpha)
Пример #7
0
    def single_episode_log_partition_function(episode):
        log_p_state = log_p_init
        for action, rendering in episode:
            log_p_state = (logsumexp(
                log_p_state[:, None] + log_p_dynamics[action], axis=0) +
                           log_p_render[:, rendering])

        return logsumexp(log_p_state)
Пример #8
0
def hmm_expected_states(log_pi0, log_Ps, ll, memlimit=2**31):
    T, K = ll.shape

    # Make sure everything is C contiguous
    log_pi0 = to_c(log_pi0)
    log_Ps = to_c(log_Ps)
    ll = to_c(ll)

    alphas = np.zeros((T, K))
    forward_pass(log_pi0, log_Ps, ll, alphas)
    normalizer = logsumexp(alphas[-1])

    betas = np.zeros((T, K))
    backward_pass(log_Ps, ll, betas)

    # Compute E[z_t] for t = 1, ..., T
    expected_states = alphas + betas
    expected_states -= logsumexp(expected_states, axis=1, keepdims=True)
    expected_states = np.exp(expected_states)

    # Compute E[z_t, z_{t+1}] for t = 1, ..., T-1
    # Note that this is an array of size T*K*K, which can be quite large.
    # To be a bit more frugal with memory, first check if the given log_Ps
    # are TxKxK.  If so, instantiate the full expected joints as well, since
    # we will need them for the M-step.  However, if log_Ps is 1xKxK then we
    # know that the transition matrix is stationary, and all we need for the
    # M-step is the sum of the expected joints.
    stationary = (log_Ps.shape[0] == 1)
    if not stationary:
        expected_joints = alphas[:-1, :, None] + betas[1:, None, :] + ll[
            1:, None, :] + log_Ps
        expected_joints -= expected_joints.max((1, 2))[:, None, None]
        expected_joints = np.exp(expected_joints)
        expected_joints /= expected_joints.sum((1, 2))[:, None, None]

    else:
        # Compute the sum over time axis of the expected joints
        # Limit ourselves to approximately 1GB of memory, assuming
        # the entries are float64's (8 bytes)
        batch_size = int(memlimit / (8 * K * K))
        assert batch_size > 0

        expected_joints = np.zeros((1, K, K))
        for start in range(0, T - 1, batch_size):
            stop = min(T - 1, start + batch_size)

            # Compute expectations in this batch
            tmp = alphas[start:stop, :,
                         None] + betas[start + 1:stop + 1,
                                       None, :] + ll[start + 1:stop + 1,
                                                     None, :] + log_Ps
            tmp -= tmp.max((1, 2))[:, None, None]
            tmp = np.exp(tmp)
            tmp /= tmp.sum((1, 2))[:, None, None]
            expected_joints += tmp.sum(axis=0)

    return expected_states, expected_joints, normalizer
Пример #9
0
def log_partition_function(natural_params, data):
    if isinstance(data, list):
        return sum(map(partial(log_partition_function, natural_params), data))

    log_pi, log_A, log_B = natural_params
    log_alpha = log_pi
    for y_t in data:
        log_alpha = logsumexp(log_alpha[:,None] + log_A, axis=0) + log_B[:,y_t]
    return logsumexp(log_alpha)
Пример #10
0
 def energy(w, X, y, v_prior, m_prior, K, N, alpha):
     """Extract parameters"""
     q = get_parameters_q(w, v_prior)
     v_noise = np.exp(parser.get(w, 'log_v_noise')[ 0, 0 ])
     
     """Note: A-approx computes its own log_factor value inside the helper
     function log_Z_likelihood, so we can shave of some computation time"""
     if Dtype != "A-approx":
         samples_q = draw_samples(q, K)
         log_factor_value = 1.0 * N * log_likelihood_factor(samples_q, v_noise, X, y)
     
     if Dtype == "KL":
         """I.e., standard VI"""
         KL = np.sum(-0.5 * np.log(2 * math.pi * v_prior) - 0.5 * ((q[ 'm' ]-m_prior)**2 + q[ 'v' ]) / v_prior) - \
             np.sum(-0.5 * np.log(2 * math.pi * q[ 'v' ] * np.exp(1)))
         vfe = -(np.mean(log_factor_value) + KL)
         
     elif Dtype == "AR-approx":
         """NOTE: Needs modification to be GVI"""
         logp0 = log_prior(samples_q, v_prior, m_prior)
         logq = log_q(samples_q, q)
         logF = logp0 + log_factor_value - logq
         logF = (1 - alpha) * logF
         vfe = -(logsumexp(logF) - np.log(K))            
         vfe = vfe / (1 - alpha)
         
     elif Dtype == "AB-approx":
         logp0 = log_prior(samples_q, v_prior, m_prior)
         logq = log_q(samples_q, q)
         part1 = (alpha + beta_D) * (log_factor_value  + logp0) - logq
         part2 = (alpha + beta_D -1 ) * logq
         part3 = (beta_D * (log_factor_value  + logp0) + (alpha - 1) * logq)
         vfe = ( (1.0 / (alpha * (alpha + beta_D))) * (logsumexp(part1) - np.log(K)) 
                 + (1.0 / (beta_D * (alpha + beta_D))) * (logsumexp(part2) - np.log(K)) 
                 - (1.0 / (alpha * beta_D)) * (logsumexp(part3) - np.log(K)))
         
     elif Dtype == "A-approx":
         f_hat = get_parameters_f_hat(q, v_prior, m_prior, N) 
         vfe =  -log_normalizer(q) - 1.0 * N / X.shape[ 0 ] / alpha * log_Z_likelihood(q, f_hat, 
                           v_noise, X, y, K) + log_Z_prior(v_prior,m_prior)
         
     elif Dtype == "AR":
         prior_reg = (1/(alpha*(alpha-1))) * prior_regularizer(q,v_prior, m_prior,alpha)
         vfe = -np.mean(log_factor_value) + prior_reg
     
     #NOTE: While this should work, this is the alpha-divergence regularizer, which 
     #       overconcentrates substantially. We refer to the appendix of our 
     #       paper for some visuals on this phenomenon. The performance from this
     #       divergence should be expected to be much worse than that for the
     #       Alpha-renyi as Uncertainty Quantifier
     elif Dtype == "A":
         prior_reg = (1/(alpha*(alpha-1))) * (
                 np.exp(prior_regularizer(q,v_prior, m_prior,alpha))-1)
         vfe = -np.mean(log_factor_value) + prior_reg
         
     return vfe
Пример #11
0
def single_update_belief_log_probas(prev_belief_log_proba_K,
                                    curr_data_log_proba_K, ltrans, a):
    trans_log_proba_KK = ltrans[:, int(a), :]
    curr_belief_log_proba_K = logsumexp(trans_log_proba_KK +
                                        prev_belief_log_proba_K)
    curr_belief_log_proba_K = curr_belief_log_proba_K + curr_data_log_proba_K
    log_norm_const = logsumexp(curr_belief_log_proba_K)
    cur_belief_log_proba_K = curr_belief_log_proba_K - log_norm_const

    return curr_belief_log_proba_K
Пример #12
0
	def predict_half(self, X_top):
		"""
		plot the top half the image concatenated with the marginal distribution over each pixel in the bottom half.
		"""
		X_bot = np.zeros((X_top.shape[0], X_top.shape[1]))
		theta_top, theta_bot = self.theta[:, :392].T, self.theta[:, 392:].T
		for i in range(392):
			constant = np.dot(X_top, np.log(theta_top)) + np.dot(1 - X_top, np.log(1 - theta_top))
			X_bot[:, i] = logsumexp(np.add(constant, np.log(theta_bot[i])), axis=1) - logsumexp(constant, axis=1) 
		save_images(np.concatenate((X_top, np.exp(X_bot)), axis=1), "predict_half.png")
Пример #13
0
def log_partition_function(natural_params, data):
    if isinstance(data, list):
        return sum(map(partial(log_partition_function, natural_params), data))

    log_pi, log_A, log_B = natural_params
    log_alpha = log_pi
    for y_t in data:
        log_alpha = logsumexp(log_alpha[:, None] + log_A, axis=0) + log_B[:,
                                                                          y_t]
    return logsumexp(log_alpha)
Пример #14
0
def build_pomdp(pi, trans, emission_mu, emission_std, data, fcpt, args):

    lpi = pi - logsumexp(pi, axis=0)
    ltrans = trans - logsumexp(trans, axis=-1, keepdims=True)

    ll = 0
    lbelief_state_set_TK = None

    # collect the complete set of beliefs over all sequences
    ll, lbelief_state_set_TK = calc_log_proba_for_many_sequences(
        lpi, ltrans, emission_mu, emission_std, data, fcpt, args)
    return lbelief_state_set_TK, ll
Пример #15
0
def label_meanfield(label_global, gaussian_globals, gaussian_stats):
    partial_contract = lambda a, b: \
        sum(np.tensordot(x, y, axes=np.ndim(y)) for x, y, in zip(a, b))

    gaussian_local_natparams = map(niw.expectedstats, gaussian_globals)
    node_params = np.array([
        partial_contract(gaussian_stats, natparam) for natparam in gaussian_local_natparams]).T

    local_natparam = dirichlet.expectedstats(label_global) + node_params
    stats = normalize(np.exp(local_natparam  - logsumexp(local_natparam, axis=1, keepdims=True)))
    vlb = np.sum(logsumexp(local_natparam, axis=1)) - contract(stats, node_params)

    return local_natparam, stats, vlb
Пример #16
0
 def logpdf(self, x):
     comp_logpdf = np.array([self.dist_cat.logpdf(i)+ self.comp_dist[i].logpdf(x)
                           for i in range(len(self.comp_dist))])
     rval = logsumexp(comp_logpdf, 0)
     if len(comp_logpdf.shape) > 1:
         rval = rval.reshape((rval.size, 1))
     return rval
Пример #17
0
    def logpdf_grad(self, x):
        rval = np.array([exp(self.dist_cat.logpdf(i))* self.comp_dist[i].logpdf_grad(x)
                              for i in range(len(self.comp_dist))])

        rval = logsumexp(rval, 0)
        
        return rval
Пример #18
0
def categorical_logpdf(data, logits, mask=None):
    """
    Compute the log probability density of a categorical distribution.
    This will broadcast as long as data and logits have the same
    (or at least compatible) leading dimensions.
    Parameters
    ----------
    data : array_like (..., D) int (0 <= data < C)
        The points at which to evaluate the log density
    lambdas : array_like (..., D, C)
        The logits of the categorical distribution(s) with C classes
    mask : array_like (..., D) bool
        Optional mask indicating which entries in the data are observed
    Returns
    -------
    lps : array_like (...,)
        Log probabilities under the categorical distribution(s).
    """
    D = data.shape[-1]
    C = logits.shape[-1]
    assert data.dtype in (int, np.int8, np.int16, np.int32, np.int64)
    assert np.all((data >= 0) & (data < C))
    assert logits.shape[-2] == D

    # Check mask
    mask = mask if mask is not None else np.ones_like(data, dtype=bool)
    assert mask.shape == data.shape

    logits = logits - logsumexp(logits, axis=-1, keepdims=True)  # (..., D, C)
    x = one_hot(data, C)  # (..., D, C)
    lls = np.sum(x * logits, axis=-1)  # (..., D)
    return np.sum(lls * mask, axis=-1)  # (...,)
    def logloss(K_conj):
        """
            K is a tensor of CONJUGATE Kraus Operators of dim s x y x x x x
            s: dim of features
            y: number of features
            x: number of labels
        """
        total_loss = 0.0

        # Iterate over each sequence in batch
        for i in range(labels.shape[0]):
            features = feats_matrix[i, :]
            label = labels[i] - 1

            # Compute likelihood of the label generating the given features
            conjKrausProduct = np.log(K_conj[features[0] - 1, 0, :, :])
            for s in range(1, features.shape[0]):
                conjKrausProduct = logdotexp(
                    np.log(K_conj[features[s] - 1, s, :, :]), conjKrausProduct)

            eta = np.zeros([K_conj.shape[3], K_conj.shape[3]],
                           dtype='complex128')
            eta[label, label] = 1

            prod1 = logdotexp(np.conjugate(conjKrausProduct), np.log(eta))
            prod2 = logdotexp(prod1, conjKrausProduct.T)
            total_loss += np.real(logsumexp(np.diag(prod2)))

            # total_loss += np.real(np.trace(np.kron(np.conjugate(conjKrausProduct)[:, label], conjKrausProduct.T[:, label]).reshape(K_conj.shape[2], K_conj.shape[3])))

        return -total_loss / labels.shape[0]
Пример #20
0
	def avg_log_likelihood(self, X, y, theta):
		ll = 0
		for c in range(10):
			X_c = get_images_by_label(X, y, c)
			log_p_x = logsumexp(np.log(0.1) + np.dot(X_c, np.log(theta.T)) + np.dot((1. - X_c), np.log(1. - theta.T)), axis=1)
			ll += np.sum(np.dot(X_c, np.log(theta[c])) + np.dot((1. - X_c), np.log(1. - theta[c])) + np.log(0.1) - log_p_x)
		return ll / X.shape[0]
    def _cost_with_vis(self,
                       inputs,
                       targets,
                       hprev,
                       weights,
                       disable_tqdm=True,
                       epoch=None):
        if epoch is not None:
            f = open('values' + str(epoch) + '.txt', "w+")
        W_hh, W_xh, b_h, W_hy, b_y = weights
        h = np.copy(hprev)
        loss = 0
        for t in tqdm(range(len(inputs)), disable=disable_tqdm):
            x = char_to_one_hot(inputs[t])
            h = np.tanh(W_hh @ h + W_xh @ x + b_h)
            if epoch is not None:
                f.write(','.join(h.astype(str)) + '\n')

            y = W_hy @ h + b_y
            target_index = char_to_index[targets[t]]
            # ps_target[t] = np.exp(ys[t][target_index])/np.sum(np.exp(ys[t]))  # probability for next chars being target
            # loss += -np.log(ps_target[t])
            loss += -(y[target_index] - logsumexp(y))
        if epoch is not None:
            f.close()
        loss = loss / len(inputs)
        return loss
Пример #22
0
def nn_predict_GCN(params, x):

    # x: NSAMPLES x NFEATURES
    U = hyper['U']
    xf = np.matmul(x, U)
    xf = np.expand_dims(xf, 1)  # NSAMPLES x 1 x NFEATURES
    xf = np.transpose(xf)  # NFEATURES x 1 x NSAMPLES

    # Filter
    yf = np.matmul(params['W1'], xf)  # for each feature
    yf = np.transpose(yf)  # NSAMPLES x NFILTERS x NFEATURES
    yf = np.reshape(yf, [-1, hyper['NFEATURES']])

    # Transform back to graph domain
    Ut = np.transpose(U)
    y = np.matmul(yf, Ut)
    y = np.reshape(y, [-1, hyper['F'], hyper['NFEATURES']])
    y += params['b1']  # NSAMPLES x NFILTERS x NFEATURES

    # nonlinear layer
    y = ReLU(y)
    # y = np.tanh(y)

    # dense layer
    y = np.reshape(y, [-1, hyper['F']*hyper['NFEATURES']])
    y = np.matmul(y, params['W2']) + params['b2']


    outputs = y

    return outputs - logsumexp(outputs, axis=1, keepdims=True)
Пример #23
0
    def _cost(self, inputs, targets, hprev, Cprev, weights, disable_tqdm=True):
        W_1, b_1, W_f, b_f, W_i, b_i, W_c, b_c, W_o, b_o, W_2, b_2 = weights
        h = np.copy(hprev)
        C = np.copy(Cprev)

        loss = 0
        for t in tqdm(range(len(inputs)), disable=disable_tqdm):
            x = char_to_one_hot(inputs[t])

            x = np.matmul(W_1, x) + b_1

            f = sigmoid(np.matmul(W_f, np.concatenate((h, x))) + b_f)
            i = sigmoid(np.matmul(W_i, np.concatenate((h, x))) + b_i)
            C_hat = np.tanh(np.matmul(W_c, np.concatenate((h, x))) + b_c)
            C = f * C + i * C_hat
            o = sigmoid(np.matmul(W_o, np.concatenate((h, x))) + b_o)
            h = o * np.tanh(C)
            y = np.matmul(W_2, h) + b_2

            target_index = char_to_index[targets[t]]
            # ps_target[t] = np.exp(ys[t][target_index])/np.sum(np.exp(ys[t]))  # probability for next chars being target
            # loss += -np.log(ps_target[t])
            loss += -(y[target_index] - logsumexp(y))

        loss = loss / len(inputs)
        return loss
Пример #24
0
    def _cost_batched(self,
                      inputs,
                      targets,
                      hprev,
                      Cprev,
                      weights,
                      disable_tqdm=True):
        W_1, b_1, W_f, b_f, W_i, b_i, W_c, b_c, W_o, b_o, W_2, b_2 = weights
        h = np.copy(hprev)
        C = np.copy(Cprev)
        h = h.reshape((self.batch_size, self.h_size, 1))
        C = C.reshape((self.batch_size, self.h_size, 1))
        loss = 0
        # W_sth_dropout = get_dropout_function((self.h_size, self.h_size + self.x_size), self.keep_prob)
        # b_sth_dropout = get_dropout_function((self.h_size,), self.keep_prob)
        # W_dropout = get_dropout_function((self.y_size, self.h_size), self.keep_prob)
        # b_dropout = get_dropout_function((self.y_size,), self.keep_prob)
        cell_dropout = get_dropout_function((self.batch_size, self.h_size, 1),
                                            self.keep_prob)
        y_dropout = get_dropout_function((self.batch_size, self.y_size, 1),
                                         self.keep_prob)
        for t in tqdm(range(len(inputs)), disable=disable_tqdm):
            x = np.array([char_to_one_hot(c) for c in inputs[:, t]])
            x = x.reshape((self.batch_size, -1, 1))

            x = np.matmul(W_1, x) + np.reshape(b_1, (-1, 1))
            x = cell_dropout(x)

            f = sigmoid(
                np.matmul(W_f, np.concatenate((h, x), axis=1)) +
                np.reshape(b_f, (-1, 1)))
            f = cell_dropout(f)
            i = sigmoid(
                np.matmul(W_i, np.concatenate((h, x), axis=1)) +
                np.reshape(b_i, (-1, 1)))
            i = cell_dropout(i)
            C_hat = np.tanh(
                np.matmul(W_c, np.concatenate((h, x), axis=1)) +
                np.reshape(b_c, (-1, 1)))
            C_hat = cell_dropout(C_hat)
            C = f * C + i * C_hat
            C = cell_dropout(C)
            o = sigmoid(
                np.matmul(W_o, np.concatenate((h, x), axis=1)) +
                np.reshape(b_o, (-1, 1)))
            o = cell_dropout(o)
            h = o * np.tanh(C)
            h = cell_dropout(h)
            ys = np.matmul(W_2, h) + np.reshape(b_2, (-1, 1))
            ys = y_dropout(ys)

            target_indices = np.array(
                [char_to_index[c] for c in targets[:, t]])
            # ps_target[t] = np.exp(ys[t][target_index])/np.sum(np.exp(ys[t]))  # probability for next chars being target
            # loss += -np.log(ps_target[t])
            loss += np.sum([
                -(y[target_index] - logsumexp(y))
                for y, target_index in zip(ys, target_indices)
            ]) / (self.number_of_steps * self.batch_size)
        return loss
Пример #25
0
    def _backprop_single(self, params, x, num_samples = 1, alpha = 1.0):
        """
        Efficient training by computing k forward pass and only 1
        backward pass (by sampling particles according to the weights).
        For VI all the weights are equal.
        """
        # compute weights
        logF = self._comp_log_weights(params, x, num_samples)
        batchsize = x.shape[1]
        lowerbound = 0.0
        logFa = (1 - alpha) * logF
        for i in xrange(batchsize):
            indl = int(i * num_samples); indr = int((i+1) * num_samples)
            log_weights = logFa[indl:indr] - logsumexp(logFa[indl:indr])
            prob = list(np.exp(log_weights))
            # current autograd doesn't support np.random.choice!
            sample_uniform = np.random.random()
            for j in xrange(num_samples):
                sample_uniform = sample_uniform - prob[j]
                if sample_uniform <= 0.0:
                    break
            ind_current = indl + j                
            lowerbound = lowerbound + logF[ind_current]

        return lowerbound
Пример #26
0
 def c_given_x(x):
     p = np.ndarray(shape=(x.shape[0], 10))
     for c in range(10):
         p[:, c] = np.log(theta[c]**x * (1 - theta[c])**(1 - x)).sum(axis=1)
     p = p - logsumexp(p, axis=1, keepdims=True)
     p = np.exp(p)
     return p
Пример #27
0
 def log_transition_matrices(self, data, input, mask, tag):
     T, D = data.shape
     log_Ps = np.dot(input[1:], self.Ws.T)[:, None, :]              # inputs
     log_Ps = log_Ps + np.dot(data[:-1], self.Rs.T)[:, None, :]     # past observations
     log_Ps = log_Ps + self.r                                       # bias
     log_Ps = np.tile(log_Ps, (1, self.K, 1))                       # expand
     return log_Ps - logsumexp(log_Ps, axis=2, keepdims=True)       # normalize
Пример #28
0
	def grad_pred_ll(self, X, W, c):
		"""
		This function calculate the gradient of the predictive log-likelihood.
		return a 10 * 784 vector
		"""
		constant = np.exp(logsumexp(np.dot(X, W.T), axis=1))
		return np.sum(X - (X.T * np.divide(np.exp(np.dot(X, W[c])), constant)).T, axis=0)
Пример #29
0
def calc_log_proba_for_one_seq(x_n_TD, a_n_T, lpi, ltrans, emission_mu,
                               emission_std):

    n_timesteps = x_n_TD.shape[0]
    n_states = lpi.shape[0]
    belief_log_proba_TK = np.zeros((n_timesteps, n_states))

    # Compute log proba array
    x_n_log_proba_TK = calc_log_proba_arr_for_x(x_n_TD, emission_mu,
                                                emission_std, n_states, a_n_T)

    x_n_log_proba_TK = x_n_log_proba_TK.flatten()

    # Initialise fwd belief vector at t = 0
    curr_belief_log_proba_K = lpi + x_n_log_proba_TK[0]
    curr_x_log_proba = logsumexp(curr_belief_log_proba_K)
    curr_belief_log_proba_K = curr_belief_log_proba_K - curr_x_log_proba
    belief_log_proba_TK[0, :] = curr_belief_log_proba_K

    log_proba_x = curr_x_log_proba

    for t in range(1, n_timesteps):
        # Update the beliefs over time
        curr_belief_log_proba_K, curr_x_log_proba = update_belief_log_probas(
            curr_belief_log_proba_K, x_n_log_proba_TK[t], ltrans, a_n_T[t])

        belief_log_proba_TK[t, :] = curr_belief_log_proba_K
        log_proba_x += curr_x_log_proba

    return log_proba_x, belief_log_proba_TK
Пример #30
0
    def class_prior(self):
        ln_p_a = np.log(self.action(
            self.params))  # individual- and time-invariant
        logits_mix = self.params[self.mixture_param_key]
        ln_p_mix = logits_mix - logsumexp(logits_mix)

        return ln_p_a, ln_p_mix
Пример #31
0
def lower_bound_MoG(theta,
                    s2min=1e-7,
                    return_dmu=False,
                    n=0,
                    return_ds2=False):
    """
    Lower bound on the entropy of a mixture of Gaussians.

    INPUT:
        theta --- all MoG parameters in the [mu; lns2] format
        s2min --- minimum variance
        return_dmu --- returns gradient with respect to mu_n, for the input n
        n --- see above
        return_ds2 ---- returns grad with respect to all s2 params
    """
    # unpack num components and dimensionality
    N, Dpp = theta.shape
    D = Dpp - 1

    # unpack mean and variance parameters
    mu = theta[:, :D]
    s2 = np.exp(theta[:, -1]) + s2min

    # compute lower bound to entropy, Eq (7) --- we compute the
    # Normal Probability N(mu_n | mu_j, s2_n + s2_j)
    S = sq_dist(mu)
    s = s2[:, None] + s2[None, :]
    lnP = (-.5 * S / s) - .5 * D * np.log(2 * np.pi) - .5 * D * np.log(s)
    lnqn = scpm.logsumexp(lnP, 1) - np.log(N)
    H = np.sum(lnqn) / float(N)

    # TODO implement gradients in the same matlab style
    return -1. * H
Пример #32
0
def avg_pred_log(w,images):
    log_pc_x = 0
    for i in range(0,images.shape[0]):
        current_log_pc_x = np.dot(np.transpose(w),images[i,:]) - logsumexp(np.dot(np.transpose(w),images[i,:]))
        log_pc_x = log_pc_x + current_log_pc_x
        
    return np.sum(log_pc_x)/float(images.shape[0])
 def _cost_batched(self,
                   inputs,
                   targets,
                   hprev,
                   weights,
                   disable_tqdm=True):
     W_hh, W_xh, b_h, W_hy, b_y = weights
     h = np.copy(hprev)
     h = h.reshape((self.batch_size, self.hidden_size, 1))
     loss = 0
     for t in tqdm(range(self.number_of_steps), disable=disable_tqdm):
         x = np.array([char_to_one_hot(c) for c in inputs[:, t]])
         x = x.reshape((self.batch_size, -1, 1))
         h = np.tanh(W_hh @ h + W_xh @ x + np.reshape(b_h, (-1, 1)))
         ys = W_hy @ h + np.reshape(b_y, (-1, 1))
         ys = np.squeeze(ys)
         target_indices = np.array(
             [char_to_index[c] for c in targets[:, t]])
         # ps_target[t] = np.exp(ys[t][target_index])/np.sum(np.exp(ys[t]))  # probability for next chars being target
         # loss += -np.log(ps_target[t])
         loss += np.sum([
             -(y[target_index] - logsumexp(y))
             for y, target_index in zip(ys, target_indices)
         ]) / (self.number_of_steps * self.batch_size)
     return loss
Пример #34
0
    def callback(params, t, g):

        print("Iteration {} lower bound {}".format(t, -objective(params, t)))

        # print (params.shape)

        log_weights = params[:k] - logsumexp(params[:k])
        print(np.exp(log_weights))
        # params2 = np.reshape(params[10:], (10, -1))
        # print (params2.shape)
        # print (params2)

        plt.cla()
        target_distribution = lambda x: np.exp(log_density(x, t))
        var_distribution = lambda x: np.exp(variational_log_density(params, x))
        plot_isocontours(ax, target_distribution)
        plot_isocontours(ax, var_distribution, cmap=plt.cm.bone)
        ax.set_autoscale_on(False)

        rs = npr.RandomState(0)
        samples = variational_sampler(params, num_plotting_samples, rs)
        plt.plot(samples[:, 0], samples[:, 1], 'x')

        plt.draw()
        plt.pause(1.0 / 30.0)
Пример #35
0
 def loss(params, X, T):
     W_vect = params[:-1]
     alpha = params[-1]
     log_prior = -L2_reg * np.dot(W_vect, W_vect)
     preds = predictions(W_vect, X, alpha)
     normalised_log_probs = preds - logsumexp(preds)
     log_lik = np.sum(normalised_log_probs * T)
     return -1.0 * (log_prior + log_lik)
Пример #36
0
 def log_likelihood(all_params, X, y, n_samples):
     rs = npr.RandomState(0)
     samples = [sample_mean_cov_from_deep_gp(all_params, X, True, rs, FITC=True) for i in xrange(n_samples)]
     return (
         logsumexp(np.array([mvn.logpdf(y, mean, var) for mean, var in samples]))
         - np.log(n_samples)
         + evaluate_prior(all_params)
     )
Пример #37
0
 def log_marginal_likelihood(params, data):
     cluster_lls = []
     for log_proportion, mean, chol in zip(*unpack_params(params)):
         cov = np.dot(chol.T, chol) + 0.000001 * np.eye(D)
         cluster_log_likelihood = log_proportion + mvn.logpdf(data, mean, cov)
         cluster_lls.append(np.expand_dims(cluster_log_likelihood, axis=0))
     cluster_lls = np.concatenate(cluster_lls, axis=0)
     return np.sum(logsumexp(cluster_lls, axis=0))
 def get_error_and_ll(w, v_prior, X, y, K, location, scale):
     v_noise = np.exp(parser.get(w, 'log_v_noise')[ 0, 0 ]) * scale**2
     q = get_parameters_q(w, v_prior)
     samples_q = draw_samples(q, K)
     outputs = predict(samples_q, X) * scale + location
     log_factor = -0.5 * np.log(2 * math.pi * v_noise) - 0.5 * (np.tile(y, (1, K)) - np.array(outputs))**2 / v_noise
     ll = np.mean(logsumexp(log_factor - np.log(K), 1))
     error = np.sqrt(np.mean((y - np.mean(outputs, 1, keepdims = True))**2))
     return error, ll
Пример #39
0
def neural_net_predict(params, inputs):
    """Implements a deep neural network for classification.
       params is a list of (weights, bias) tuples.
       inputs is an (N x D) matrix.
       returns normalized class log-probabilities."""
    for W, b in params:
        outputs = np.dot(inputs, W) + b
        inputs = np.tanh(outputs)
    return outputs - logsumexp(outputs, axis=1, keepdims=True)
Пример #40
0
 def predicted_class_logprobs(self, W_vect, inputs):
     for W, b in self.unpack_layers(W_vect):
         outputs = np.dot(inputs, W) + b
         if self.activation_type == 'tanh':
             inputs = np.tanh(outputs)
         elif self.activation_type == 'relu':
             inputs = relu(outputs)
         else:
             raise ValueException('unknown activation_type {}'.format(self.activation_type))
     return outputs - logsumexp(outputs, axis=1, keepdims=True)
Пример #41
0
 def predictions(self, W_vect, inputs):
     '''For classsification, returns N*C matrix of log probabilities.
     For rregression, returns N*K matrix of predicted means'''
     for W, b in self.unpack_layers(W_vect):
         outputs = np.dot(inputs, W) + b
         inputs = self.nonlinearity(outputs)
     if self.output_type == 'regression':
         return outputs
     if self.output_type == 'classification':
         logprobs = outputs - logsumexp(outputs, axis=1, keepdims=True)
         return logprobs
Пример #42
0
 def unpack_params(params):
     """Unpacks parameter vector into the proportions, means and covariances
     of each mixture component.  The covariance matrices are parametrized by
     their Cholesky decompositions."""
     log_proportions = parser.get(params, "log proportions")
     normalized_log_proportions = log_proportions - logsumexp(log_proportions)
     means = parser.get(params, "means")
     lower_tris = np.tril(parser.get(params, "lower triangles"), k=-1)
     diag_chols = np.exp(parser.get(params, "log diagonals"))
     chols = lower_tris + np.make_diagonal(diag_chols, axis1=-1, axis2=-2)
     return normalized_log_proportions, means, chols
Пример #43
0
 def unpack_params(params):
     """Unpacks parameter vector into the proportions, means and covariances
     of each mixture component.  The covariance matrices are parametrized by
     their Cholesky decompositions."""
     log_proportions    = parser.get(params, 'log proportions')
     normalized_log_proportions = log_proportions - logsumexp(log_proportions)
     means              = parser.get(params, 'means')
     lower_tris = np.tril(parser.get(params, 'lower triangles'), k=-1)
     diag_chols = np.exp( parser.get(params, 'log diagonals'))
     chols = []
     for lower_tri, diag in zip(lower_tris, diag_chols):
         chols.append(np.expand_dims(lower_tri + np.diag(diag), 0))
     chols = np.concatenate(chols, axis=0)
     return normalized_log_proportions, means, chols
Пример #44
0
 def _m_step(self):
     assert(self.resp.shape[0] == self.num_samp)
     pseud_lcount = logsumexp(self.resp, axis = 0).flat
     r = exp(self.resp)        
     
     self.comp_dist = []
     for c in range(self.num_components):
         norm = exp(pseud_lcount[c])
         mu = np.sum(r[:,c:c+1] * self.samples, axis=0) / norm
         diff = self.samples - mu
         scatter_matrix = np.zeros([self.samples.shape[1]]*2)
         for i in range(diff.shape[0]):
             scatter_matrix += r[i,c:c+1] *diff[i:i+1,:].T.dot(diff[i:i+1,:])
         scatter_matrix /= norm
         self.comp_dist.append(mvnorm(mu, scatter_matrix))
     self.comp_lprior = pseud_lcount - log(self.num_samp)
Пример #45
0
def mog_like(x, means, icovs, dets, pis):
    """ compute the log likelihood according to a mixture of gaussians
        with means = [mu0, mu1, ... muk]
             icovs = [C0^-1, ..., CK^-1]
             dets = [|C0|, ..., |CK|]
             pis  = [pi1, ..., piK] (sum to 1)
        at locations given by x = [x1, ..., xN]
    """
    xx = np.atleast_2d(x)
    centered = xx[:,:,np.newaxis] - means.T[np.newaxis,:,:]
    solved   = np.einsum('ijk,lji->lki', icovs, centered)
    logprobs = -0.5*np.sum(solved * centered, axis=1) - np.log(2*np.pi) - 0.5*np.log(dets) + np.log(pis)
    logprob  = scpm.logsumexp(logprobs, axis=1)
    if len(x.shape) == 1:
        return np.exp(logprob[0])
    else:
        return np.exp(logprob)
Пример #46
0
def mog_logmarglike(x, means, covs, pis, ind=0):
    """ marginal x or y (depending on ind) """
    K = pis.shape[0]
    xx = np.atleast_2d(x)
    centered = xx.T - means[:,ind,np.newaxis].T
    logprobs = []
    for kk in xrange(K):
        quadterm  = centered[:,kk] * centered[:,kk] * (1./covs[kk,ind,ind])
        logprobsk = -.5*quadterm - .5*np.log(2*np.pi) \
                    -.5*np.log(covs[kk,ind,ind]) + np.log(pis[kk])
        logprobs.append(np.squeeze(logprobsk))
    logprobs = np.array(logprobs)
    logprob  = scpm.logsumexp(logprobs, axis=0)
    if np.isscalar(x):
        return logprob[0]
    else:
        return logprob 
Пример #47
0
def cost(theta):
    # Unpack parameters
    nu = np.concatenate([theta[1], [0]], axis=0)
    
    S = theta[0]
    logdetS = np.expand_dims(np.linalg.slogdet(S)[1], 1)
    y = np.concatenate([samples.T, np.ones((1, N))], axis=0)

    # Calculate log_q
    y = np.expand_dims(y, 0)
    
    # 'Probability' of y belonging to each cluster
    log_q = -0.5 * (np.sum(y * np.linalg.solve(S, y), axis=1) + logdetS)

    alpha = np.exp(nu)
    alpha = alpha / np.sum(alpha)
    alpha = np.expand_dims(alpha, 1)
    
    loglikvec = logsumexp(np.log(alpha) + log_q, axis=0)
    return -np.sum(loglikvec)
Пример #48
0
def gmm_logprob(x, ws, mus, sigs, invsigs=None, logdets=None):
    """ Gaussian Mixture Model likelihood
        Input:
          - x    = N x D array of data (N iid)
          - ws   = K length vector that sums to 1, mixing weights
          - mus  = K x D array of mixture component means
          - sigs = K x D x D array of mixture component covariances

          - invsigs = K x D x D array of mixture component covariance inverses
          - logdets = K array of mixture component covariance logdets

        Output:
          - N length array of log likelihood values

        TODO: speed this up
    """

    if sigs is None:
        assert invsigs is not None and logdets is not None, \
                "need sigs if you don't include logdets and invsigs"

    # compute invsigs if needed
    if invsigs is None:
        invsigs = np.array([np.linalg.inv(sig) for sig in sigs])
        logdets = np.array([np.linalg.slogdet(sig)[1] for sig in sigs])

    # compute each gauss component separately
    xx = np.atleast_2d(x)
    centered = xx[:,:,np.newaxis] - mus.T[np.newaxis,:,:]
    solved   = np.einsum('ijk,lji->lki', invsigs, centered)
    logprobs = -0.5*np.sum(solved * centered, axis=1) - \
                    np.log(2*np.pi) - 0.5*logdets + np.log(ws)
    logprob  = scpm.logsumexp(logprobs, axis=1)
    if len(x.shape) == 1:
        return logprob[0]
    else:
        return logprob
Пример #49
0
def logsoftmax(v):
    return v - logsumexp(v, 1).reshape(-1, 1)
Пример #50
0
 def hiddens_to_output_probs(hiddens):
     output = concat_and_multiply(params['predict'], hiddens)
     return output - logsumexp(output, axis=1, keepdims=True) 
Пример #51
0
 def loss(W_vect, X, T):
     log_prior = -L2_reg * np.dot(W_vect, W_vect)
     preds = predictions(W_vect, X)
     normalised_log_probs = preds - logsumexp(preds)
     log_lik = np.sum(normalised_log_probs * T)
     return -1.0 * (log_prior + log_lik)
 def mixture_log_density(var_mixture_params, x):
     """Returns a weighted average over component densities."""
     log_weights, var_params = unpack_mixture_params(var_mixture_params)
     component_log_densities = np.vstack([component_log_density(params_k, x)
                                          for params_k in var_params]).T
     return logsumexp(component_log_densities + log_weights, axis=1, keepdims=False)
Пример #53
0
 def _e_step(self):
     lpdfs = np.array([d.logpdf(self.samples).flat[:] 
                           for d in self.comp_dist]).T + self.comp_lprior
     self.resp = lpdfs - logsumexp(lpdfs, axis = 1).reshape((self.num_samp, 1))
Пример #54
0
 def log_likelihood(all_params):  # implement mini batches later?
     n_samples = 1
     samples = [sample_mean_cov_from_deep_gp(all_params, X, True) for i in xrange(n_samples)]
     return logsumexp(np.array([mvn.logpdf(y,mean,var+1e-6*np.eye(len(var))*np.max(np.diag(var))) for mean,var in samples])) - np.log(n_samples) \
         + evaluate_prior(all_params)
 def log_Z_likelihood(q, f_hat, v_noise, X, y, K):
     samples = draw_samples(q, K)
     log_f_hat = np.sum(-0.5 / f_hat[ 'v' ] * samples**2 + f_hat[ 'm' ] / f_hat[ 'v' ] * samples, 1)
     log_factor_value = alpha * (log_likelihood_factor(samples, v_noise, X, y) - log_f_hat)
     return np.sum(logsumexp(log_factor_value, 1) + np.log(1.0 / K))
Пример #56
0
 def predictions(W_vect, inputs):
     for W, b in unpack_layers(W_vect):
         outputs = np.dot(inputs, W) + b
         inputs = np.tanh(outputs)
     return outputs - logsumexp(outputs, axis=1, keepdims=True)
Пример #57
0
def gaussian_loglike(x, mu, log_sigmasq):
    return np.mean(logsumexp(
        -0.5*((np.log(2*np.pi) + log_sigmasq) + (x - mu)**2. / np.exp(log_sigmasq)),
        axis=0))
Пример #58
0
 def log_softmax(self, batch):
     batch = batch - np.max(batch, axis=1, keepdims=True)
     return batch - logsumexp(batch, axis=1).reshape((batch.shape[0], -1))
Пример #59
0
 def logpdf(self, x):
     rval = np.array([self.comp_lprior[i]+ self.comp_dist[i].logpdf(x)
                           for i in range(self.comp_lprior.size)])
     rval = logsumexp(rval, 0).flatten()
     return rval