Exemplo n.º 1
0
 def mixmixlogp(value, point):
     floatX = theano.config.floatX
     priorlogp = st.dirichlet.logpdf(x=point['g_w'],
                                     alpha=np.ones(nbr)*0.0000001,
                                     ).astype(floatX) + \
                 st.expon.logpdf(x=point['mu_g']).sum(dtype=floatX) + \
                 st.dirichlet.logpdf(x=point['l_w'],
                                     alpha=np.ones(nbr)*0.0000001,
                                     ).astype(floatX) + \
                 st.expon.logpdf(x=point['mu_l']).sum(dtype=floatX) + \
                 st.dirichlet.logpdf(x=point['mix_w'],
                                     alpha=np.ones(2),
                                     ).astype(floatX)
     complogp1 = st.norm.logpdf(x=value,
                                loc=point['mu_g']).astype(floatX)
     mixlogp1 = logsumexp(np.log(point['g_w']).astype(floatX) +
                          complogp1,
                          axis=-1, keepdims=True)
     complogp2 = st.lognorm.logpdf(value,
                                   1.,
                                   0.,
                                   np.exp(point['mu_l'])).astype(floatX)
     mixlogp2 = logsumexp(np.log(point['l_w']).astype(floatX) +
                          complogp2,
                          axis=-1, keepdims=True)
     complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1)
     mixmixlogpg = logsumexp(np.log(point['mix_w']).astype(floatX) +
                             complogp_mix,
                             axis=-1, keepdims=True)
     return priorlogp, mixmixlogpg
Exemplo n.º 2
0
 def logpdf(self, pts, pool=None):
     """Evaluate the logpdf of the KDE at `pts`."""
     logpdfs = [logweight + kde(pts, pool=pool)
                for logweight, kde in zip(self._logweights, self._kdes)]
     if len(pts.shape) == 1:
         return logsumexp(logpdfs)
     else:
         return logsumexp(logpdfs, axis=0)
 def _margtimedist_loglr(self, mf_snr, opt_snr):
     """Returns the log likelihood ratio marginalized over time and
     distance.
     """
     logl = special.logsumexp(mf_snr, b=self._deltat)
     logl_marg = logl/self._dist_array
     opt_snr_marg = opt_snr/self._dist_array**2
     return special.logsumexp(logl_marg - 0.5*opt_snr_marg,
                              b=self._deltad*self.dist_prior)
Exemplo n.º 4
0
def test_logsumexp_shape():
    a = np.ones((1, 2, 3, 4))
    b = np.ones_like(a)

    r = logsumexp(a, axis=2, b=b)
    assert_equal(r.shape, (1, 2, 4))

    r = logsumexp(a, axis=(1, 3), b=b)
    assert_equal(r.shape, (1, 3))
Exemplo n.º 5
0
def test_logsumexp_sign_shape():
    a = np.ones((1,2,3,4))
    b = np.ones_like(a)

    r, s = logsumexp(a, axis=2, b=b, return_sign=True)

    assert_equal(r.shape, s.shape)
    assert_equal(r.shape, (1,2,4))

    r, s = logsumexp(a, axis=(1,3), b=b, return_sign=True)

    assert_equal(r.shape, s.shape)
    assert_equal(r.shape, (1,3))
Exemplo n.º 6
0
    def WAIC(self):
        # WAIC
        # from https://github.com/pymc-devs/pymc3/blob/02f0b7f9a487cf18e9a48b754b54c2a99cf9fba8/pymc3/stats.py
        # We get three different measurements:
        # waic: widely available information criterion
        # waic_se: standard error of waic
        # p_waic: effective number parameters

        log_py=np.atleast_2d(array([self.lnprob(theta) 
                                        for theta in self.samples])).T
        lppd_i = logsumexp(log_py, axis=0, b=1.0 / len(log_py))
        vars_lpd = np.var(log_py, axis=0)
        warn_mg = 0
        if np.any(vars_lpd > 0.4):
            warnings.warn("""For one or more samples the posterior variance of the
            log predictive densities exceeds 0.4. This could be indication of
            WAIC starting to fail see http://arxiv.org/abs/1507.04544 for details
            """)
            warn_mg = 1

        waic_i = - 2 * (lppd_i - vars_lpd)
        waic = np.sum(waic_i)
        waic_se = np.sqrt(len(waic_i) * np.var(waic_i))
        p_waic = np.sum(vars_lpd)            

        self.waic={'waic': waic,
                   'waic_se':waic_se,
                   'p_waic':p_waic,
        }

        return waic,waic_se,p_waic
Exemplo n.º 7
0
def mixing_posterior(mod, states):
    resid = mod.endog[:, 0] - states[0]

    # Construct the means (nobs x 7), variances (7,), prior probabilities (7,)
    means = ksc_params[None, :, 1] - 1.27036
    variances = ksc_params[:, 2]
    prior_probabilities = ksc_params[:, 0]

    # Make dimensions compatible for broadcasting
    resid = np.repeat(resid[:, None], len(variances), axis=-1)
    variances = np.repeat(variances[None, :], mod.nobs, axis=0)
    prior_probabilities = np.repeat(prior_probabilities[None, :], mod.nobs,
                                    axis=0)

    # Compute loglikelihood (nobs x 7)
    loglikelihoods = -0.5 * ((resid - means)**2 / variances +
                             np.log(2 * np.pi * variances))

    # Get (values proportional to) the (log of the) posterior (nobs x 7)
    posterior_kernel = loglikelihoods + np.log(prior_probabilities)

    # Normalize to get the actual posterior probabilities
    tmp = logsumexp(posterior_kernel, axis=1)
    posterior_probabilities = np.exp(posterior_kernel - tmp[:, None])

    return posterior_probabilities
Exemplo n.º 8
0
    def test_mixture_of_mvn(self):
        mu1 = np.asarray([0., 1.])
        cov1 = np.diag([1.5, 2.5])
        mu2 = np.asarray([1., 0.])
        cov2 = np.diag([2.5, 3.5])
        obs = np.asarray([[.5, .5], mu1, mu2])
        with Model() as model:
            w = Dirichlet('w', floatX(np.ones(2)), transform=None)
            mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1)
            mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2)
            y = Mixture('x_obs', w, [mvncomp1, mvncomp2],
                    observed=obs)

        # check logp of each component
        complogp_st = np.vstack((st.multivariate_normal.logpdf(obs, mu1, cov1),
                                 st.multivariate_normal.logpdf(obs, mu2, cov2))
                                ).T
        complogp = y.distribution._comp_logp(theano.shared(obs)).eval()
        assert_allclose(complogp, complogp_st)

        # check logp of mixture
        testpoint = model.test_point
        mixlogp_st = logsumexp(np.log(testpoint['w']) + complogp_st,
                               axis=-1, keepdims=True)
        assert_allclose(y.logp_elemwise(testpoint),
                        mixlogp_st)

        # check logp of model
        priorlogp = st.dirichlet.logpdf(x=testpoint['w'],
                                        alpha=np.ones(2),
                                        )
        assert_allclose(model.logp(testpoint),
                        mixlogp_st.sum() + priorlogp)
Exemplo n.º 9
0
    def _global_jump(self, replicas_log_P_k):
        """
        Global jump scheme.
        This method is described after Eq. 3 in [2]
        """
        n_replica, n_states = self.n_replicas, self.n_states
        for replica_index, current_state_index in enumerate(self._replica_thermodynamic_states):
            neighborhood = self._neighborhood(current_state_index)

            # Compute unnormalized log probabilities for all thermodynamic states.
            log_P_k = np.zeros([n_states], np.float64)
            for state_index in neighborhood:
                u_k = self._energy_thermodynamic_states[replica_index, :]
                log_P_k[state_index] =  - u_k[state_index] + self.log_weights[state_index]
            log_P_k -= logsumexp(log_P_k)

            # Update sampler Context to current thermodynamic state.
            P_k = np.exp(log_P_k[neighborhood])
            new_state_index = np.random.choice(neighborhood, p=P_k)
            self._replica_thermodynamic_states[replica_index] = new_state_index

            # Accumulate statistics.
            replicas_log_P_k[replica_index,:] = log_P_k[:]
            self._n_proposed_matrix[current_state_index, neighborhood] += 1
            self._n_accepted_matrix[current_state_index, new_state_index] += 1
 def _margdist_loglr(self, mf_snr, opt_snr):
     """Returns the log likelihood ratio marginalized over distance.
     """
     mf_snr_marg = mf_snr/self._dist_array
     opt_snr_marg = opt_snr/self._dist_array**2
     return special.logsumexp(mf_snr_marg - 0.5*opt_snr_marg,
                              b=self._deltad*self.dist_prior)
Exemplo n.º 11
0
    def log_forward(self, input):
        """Forward pass for sigmoid hidden layers and output softmax"""

        # Input
        tilde_z = input
        layer_inputs = []

        # Hidden layers
        num_hidden_layers = len(self.parameters) - 1
        for n in range(num_hidden_layers):

            # Store input to this layer (needed for backpropagation)
            layer_inputs.append(tilde_z)

            # Linear transformation
            weight, bias = self.parameters[n]
            z = np.dot(tilde_z, weight.T) + bias

            # Non-linear transformation (sigmoid)
            tilde_z = 1.0 / (1 + np.exp(-z))

        # Store input to this layer (needed for backpropagation)
        layer_inputs.append(tilde_z)

        # Output linear transformation
        weight, bias = self.parameters[num_hidden_layers]
        z = np.dot(tilde_z, weight.T) + bias

        # Softmax is computed in log-domain to prevent underflow/overflow
        log_tilde_z = z - logsumexp(z, axis=1)[:, None]

        return log_tilde_z, layer_inputs
Exemplo n.º 12
0
    def log_forward(self, input):

        # Get parameters and sizes
        W_e, W_x, W_h, W_y = self.parameters
        hidden_size = W_h.shape[0]
        nr_steps = input.shape[0]

        # Embedding layer
        z_e = W_e[input, :]

        # Recurrent layer
        h = np.zeros((nr_steps + 1, hidden_size))
        for t in range(nr_steps):

            # Linear
            z_t = W_x.dot(z_e[t, :]) + W_h.dot(h[t, :])

            # Non-linear
            h[t+1, :] = 1.0 / (1 + np.exp(-z_t))

        # Output layer
        y = h[1:, :].dot(W_y.T)

        # Softmax
        log_p_y = y - logsumexp(y, axis=1)[:, None]

        return log_p_y, y, h, z_e, input
Exemplo n.º 13
0
def test_logsumexp_sign():
    a = [1,1,1]
    b = [1,-1,-1]

    r, s = logsumexp(a, b=b, return_sign=True)
    assert_almost_equal(r,1)
    assert_equal(s,-1)
Exemplo n.º 14
0
 def logp_matches(self, mixture, latent_mix, z, npop, model):
     if theano.config.floatX == 'float32':
         rtol = 1e-4
     else:
         rtol = 1e-7
     test_point = model.test_point
     test_point['latent_m'] = test_point['m']
     mix_logp = mixture.logp(test_point)
     logps = []
     for component in range(npop):
         test_point['z'] = component * np.ones(z.distribution.shape)
         # Count the number of axes that should be broadcasted from z to
         # modify the logp
         sh1 = test_point['z'].shape
         sh2 = test_point['latent_m'].shape
         if len(sh1) > len(sh2):
             sh2 = (1,) * (len(sh1) - len(sh2)) + sh2
         elif len(sh2) > len(sh1):
             sh1 = (1,) * (len(sh2) - len(sh1)) + sh1
         reps = np.prod([s2 if s1 != s2 else 1 for s1, s2 in
                         zip(sh1, sh2)])
         z_logp = z.logp(test_point) * reps
         logps.append(z_logp + latent_mix.logp(test_point))
     latent_mix_logp = logsumexp(np.array(logps), axis=0)
     assert_allclose(mix_logp, latent_mix_logp, rtol=rtol)
Exemplo n.º 15
0
def test_logsumexp_sign_zero():
    a = [1,1]
    b = [1,-1]

    r, s = logsumexp(a, b=b, return_sign=True)
    assert_(not np.isfinite(r))
    assert_(not np.isnan(r))
    assert_(r < 0)
    assert_equal(s,0)
Exemplo n.º 16
0
 def _do_forward_pass(self, framelogprob):
     n_samples, n_components = framelogprob.shape
     fwdlattice = np.zeros((n_samples, n_components))
     _hmmc._forward(n_samples, n_components,
                    log_mask_zero(self.startprob_),
                    log_mask_zero(self.transmat_),
                    framelogprob, fwdlattice)
     with np.errstate(under="ignore"):
         return logsumexp(fwdlattice[-1]), fwdlattice
Exemplo n.º 17
0
    def __call__(self, y_true, raw_predictions, average=True):
        one_hot_true = np.zeros_like(raw_predictions)
        prediction_dim = raw_predictions.shape[0]
        for k in range(prediction_dim):
            one_hot_true[k, :] = (y_true == k)

        loss = (logsumexp(raw_predictions, axis=0) -
                (one_hot_true * raw_predictions).sum(axis=0))
        return loss.mean() if average else loss
 def _margdistphase_loglr(self, mf_snr, opt_snr):
     """Returns the log likelihood ratio marginalized over distance and
     phase.
     """
     logl = numpy.log(special.i0(mf_snr))
     logl_marg = logl/self._dist_array
     opt_snr_marg = opt_snr/self._dist_array**2
     return special.logsumexp(logl_marg - 0.5*opt_snr_marg,
                              b=self._deltad*self.dist_prior)
Exemplo n.º 19
0
def test_logsumexp():
    a = np.random.normal(size=(200, 500, 5))

    for axis in range(a.ndim):
        ans_ne = pymbar.utils.logsumexp(a, axis=axis)
        ans_no_ne = pymbar.utils.logsumexp(a, axis=axis, use_numexpr=False)
        ans_scipy = logsumexp(a, axis=axis)
        eq(ans_ne, ans_no_ne)
        eq(ans_ne, ans_scipy)
Exemplo n.º 20
0
    def log_forward(self, input=None):
        """Forward pass of the computation graph"""

        # Linear transformation
        z = np.dot(input, self.weight.T) + self.bias

        # Softmax implemented in log domain
        log_tilde_z = z - logsumexp(z, axis=1)[:, None]

        return log_tilde_z
Exemplo n.º 21
0
    def _compute_log_likelihood(self, X):
        n_samples, _ = X.shape
        res = np.zeros((n_samples, self.n_components))

        for i in range(self.n_components):
            log_denses = self._compute_log_weighted_gaussian_densities(X, i)
            with np.errstate(under="ignore"):
                res[:, i] = logsumexp(log_denses, axis=1)

        return res
Exemplo n.º 22
0
 def _logsf(self, k, M, n, N):
     """
     More precise calculation than log(sf)
     """
     res = []
     for quant, tot, good, draw in zip(k, M, n, N):
         # Integration over probability mass function using logsumexp
         k2 = np.arange(quant + 1, draw + 1)
         res.append(logsumexp(self._logpmf(k2, tot, good, draw)))
     return np.asarray(res)
Exemplo n.º 23
0
    def logpdf(self, pts, pool=None):
        """Evaluate the log-transdimensional-pdf at `pts` as estimated by the KDE."""
        logpdfs = []
        for logweight, space, kde in zip(self._logweights,
                                         self.spaces,
                                         self.kdes):
            # Calculate the probability for each parameter space individually
            if np.all(space == ~pts.mask) and np.isfinite(logweight):
                logpdfs.append(logweight + kde(pts[space], pool=pool))

        return logsumexp(logpdfs, axis=0)
Exemplo n.º 24
0
 def _logcdf(self, k, M, n, N):
     res = []
     for quant, tot, good, draw in zip(k, M, n, N):
         if (quant + 0.5) * (tot + 0.5) > (good - 0.5) * (draw - 0.5):
             # Less terms to sum if we calculate log(1-sf)
             res.append(log1p(-exp(self.logsf(quant, tot, good, draw))))
         else:
             # Integration over probability mass function using logsumexp
             k2 = np.arange(0, quant + 1)
             res.append(logsumexp(self._logpmf(k2, tot, good, draw)))
     return np.asarray(res)
Exemplo n.º 25
0
  def compute_kl_with_logits(self, logits1, logits2):
    """Computes KL from logits samples from two distributions."""

    def exp_times_diff(a, b):
      return np.multiply(np.exp(a), a - b)

    logsumexp1 = logsumexp(logits1, axis=1)
    logsumexp2 = logsumexp(logits2, axis=1)
    logsumexp_diff = logsumexp2 - logsumexp1

    exp_diff = exp_times_diff(logits1, logits2)
    exp_diff = np.sum(exp_diff, axis=1)

    inv_exp_sum = np.sum(np.exp(logits1), axis=1)
    term1 = np.divide(exp_diff, inv_exp_sum)

    kl = term1 + logsumexp_diff
    kl = np.maximum(kl, 0.0)
    kl = np.nan_to_num(kl)
    return np.mean(kl)
Exemplo n.º 26
0
    def fit(self, obs, three_para):
        """Estimate model parameters.

        An initialization step is performed before entering the EM
        algorithm. If you want to avoid this step, pass proper
        ``init_params`` keyword argument to estimator's constructor.

        Parameters
        ----------
        obs : list
            List of array-like observation sequences, each of which
            has shape (n_i, n_features), where n_i is the length of
            the i_th observation.

        Notes
        -----
        In general, `logprob` should be non-decreasing unless
        aggressive pruning is used.  Decreasing `logprob` is generally
        a sign of overfitting (e.g. a covariance parameter getting too
        small).  You can fix this by getting more training data,
        or strengthening the appropriate subclass-specific regularization
        parameter.
        """

        # what does this mean??
        self._init(obs, self.init_params)

        logprob = []
        for i in range(self.n_iter):
            # Expectation step
            stats = self._initialize_sufficient_statistics()
            curr_logprob = 0
            for seq in obs:
                framelogprob = self._compute_log_likelihood(seq)
                lpr, fwdlattice = self._do_forward_pass(framelogprob)
                bwdlattice = self._do_backward_pass(framelogprob)
                gamma = fwdlattice + bwdlattice
                posteriors = np.exp(gamma.T - logsumexp(gamma, axis=1)).T
                curr_logprob += lpr
                self._accumulate_sufficient_statistics(
                    stats, seq, framelogprob, posteriors, fwdlattice,
                    bwdlattice)
            logprob.append(curr_logprob)

            # Check for convergence.
            if i > 0 and logprob[-1] - logprob[-2] < self.tol:
                break

            # Maximization step
            self._do_mstep(stats, three_para)
        #print("Logprob of all M-steps: %s" %logprob, file=sys.stderr)
        self.em_prob = logprob[-1]
        return self
Exemplo n.º 27
0
def compute_pvalue(distr, N, side, current_p, x):
    """Compute log2 pvalue"""
    sum_num = []
    sum_denum = []
    it = range(N / 2 + 1) if side == 'r' else range(N + 1, -1, -1)

    for i in it:
        p1 = get_log_value(i, distr)
        p2 = get_log_value(N - i, distr)
        p = p1 + p2

        if _comp(i, x, side, current_p, p):
            # if p > current_p:
            sum_num.append(p)
        sum_denum.append(p)

    if distr['distr_name'] == 'nb':
        sum_num = map(lambda x: float(x), sum_num)
        sum_denum = map(lambda x: float(x), sum_denum)

    return logsumexp(np.array(sum_num)) - (log(2) + logsumexp(np.array(sum_denum)))
Exemplo n.º 28
0
def griffiths_log_prob_coal_counts(a,b,t,N):
    t = t/(2*N)
    n = a
    alpha = 1/2*n*t
    beta = -1/2*t
    h = eta(alpha,beta)
    mu = 2 * h * t**(-1)
    var = 2*h*t**(-1) * (h + beta)**2
    var *= (1 + h/(h+beta) - h/alpha - h/(alpha + beta) - 2*h)
    var *= beta**-2
    std = np.sqrt(var)
    return stats.norm.logpdf(b,mu,std) - logsumexp(stats.norm.logpdf(np.arange(1,a+1),mu,std))
Exemplo n.º 29
0
def lnprob(params, nt, allFluxes, allFluxesVar, fmod_atZ, pmin, pmax):
    if np.any(params > pmax) or np.any(params < pmin):
            return - np.inf
    alphas = params[0:nt]
    betas = params[nt:2*nt][None, :]
    lnlike_grid = scalefree_flux_lnlikelihood_multiobj(
        allFluxes[:, None, :], allFluxesVar[:, None, :], fmod_atZ)  # no, nt
    p_t = dirichlet(alphas)
    p_z = redshifts * np.exp(-0.5 * redshifts**2 / betas) / betas  # p(z|t)
    p_z_t = p_z * p_t  # no, nt
    lnlike_lt = logsumexp(lnlike_grid + np.log(p_z_t), axis=1)
    return - np.sum(lnlike_lt)
Exemplo n.º 30
0
    def logpdf(self, x):
        """
        Evaluate the log of the estimated pdf on a provided set of points.
        """

        points = atleast_2d(x)

        d, m = points.shape
        if d != self.d:
            if d == 1 and m == self.d:
                # points was passed in as a row vector
                points = reshape(points, (self.d, 1))
                m = 1
            else:
                msg = "points have dimension %s, dataset has dimension %s" % (d,
                    self.d)
                raise ValueError(msg)

        result = zeros((m,), dtype=float)

        if m >= self.n:
            # there are more points than data, so loop over data
            energy = zeros((self.n, m), dtype=float)
            for i in range(self.n):
                diff = self.dataset[:, i, newaxis] - points
                tdiff = dot(self.inv_cov, diff)
                energy[i] = sum(diff*tdiff, axis=0) / 2.0
            result = logsumexp(-energy,
                               b=self.weights[i]*self.n/self._norm_factor,
                               axis=0)
        else:
            # loop over points
            for i in range(m):
                diff = self.dataset - points[:, i, newaxis]
                tdiff = dot(self.inv_cov, diff)
                energy = sum(diff * tdiff, axis=0) / 2.0
                result[i] = logsumexp(-energy,
                                      b=self.weights*self.n/self._norm_factor)

        return result
Exemplo n.º 31
0
def _convert_raw_seg_to_seg(raw_seg, run_lengths, alg_name, min_value, max_value):
    s, b, p = raw_seg['log_weights'].shape  # sequence length, batch size, number of particles
    act_dim = raw_seg['ac'].shape[-1]
    state_dim = raw_seg['ob'].shape[-1]

    mask = np.transpose(np.reshape(raw_seg['mask'], (s, p, b)),(0,2,1))
    ob = np.transpose(np.reshape(raw_seg['ob'], (s, p, b, state_dim)),(0,2,1,3))
    ac = np.transpose(np.reshape(raw_seg['ac'], (s, p, b, act_dim)),(0,2,1,3))

    log_p_div_q = raw_seg['log_p_z'] + raw_seg['log_p_x_given_z'] - raw_seg['log_q_z']
    log_p_div_q = np.transpose(np.reshape(log_p_div_q, (s, p, b)),(0,2,1))
    log_p_div_q = log_p_div_q * mask

    log_p_xz = raw_seg['log_p_z'] + raw_seg['log_p_x_given_z']
    log_p_xz = np.transpose(np.reshape(log_p_xz, (s, p, b)),(0,2,1))
    log_p_xz = log_p_xz * mask

    initial_pr = np.zeros((1, b, p))
    cum_prs = np.cumsum(log_p_div_q, axis=0)
    cum_prs = np.concatenate((initial_pr, cum_prs[:-1,:,:]), axis=0)    # (s,b,p)

    ob = np.reshape(ob, (s*b*p, state_dim))
    if alg_name == "reinforce":
        current_fef = 0
        future_fef = 0
        fef_weights = 0
        future_fef_weights = 0

        # compute hiah_var_coeff
        high_var_coeff = log_p_div_q - np.log(p)
    elif alg_name == "vimco":
        current_fef = 0
        future_fef = 0
        fef_weights = 0
        future_fef_weights = 0

        # compute hiah_var_coeff
        vimco_numerators = np.zeros((b,1,p))
        vimco_denominators = np.zeros((b, 1, p))

        for i in range(p):
            total_sum = np.sum(log_p_div_q * mask, axis=0)

            vimco_numerators = np.concatenate((vimco_numerators, np.reshape(total_sum, (b,1,p))), axis=1)

            total_sum[:,i] = (np.sum(total_sum, axis=-1) - total_sum[:,i]) / (p-1)
            total_sum = np.reshape(total_sum, (b,1,p))
            vimco_denominators = np.concatenate((vimco_denominators, total_sum), axis=1)

        vimco_numerators = vimco_numerators[:,1:,:]
        vimco_numerators = logsumexp(vimco_numerators, axis=-1)

        vimco_denominators = vimco_denominators[:, 1:, :]
        vimco_denominators = logsumexp(vimco_denominators, axis=-1)
        high_var_coeff = vimco_numerators - vimco_denominators
    elif alg_name == "vifle" or alg_name == "fr":
        current_fef = np.transpose(np.reshape(raw_seg['fef_vpred'], (s, p, b)), (0,2,1))
        current_fef = current_fef * mask  # (s,b,p)

        # future_value: log(V)
        future_fef = np.concatenate((current_fef[1:,:,:], np.zeros((1,b,p))), axis=0)
        fef_weights = np.zeros([s,b])
        future_fef_weights = np.zeros([s,b])
        for i in range(b):
            fef_weights[:run_lengths[i], i] = np.arange(run_lengths[i])[::-1] + 1
            future_fef_weights[:run_lengths[i] - 1, i] = np.arange(run_lengths[i] - 1)[::-1] + 1
        # compute hiah_var_coeff
        if alg_name == "vifle":
            total_sum = np.sum(log_p_div_q * mask, axis=0, keepdims=True)  # (1,b,p)
            total_sum = np.broadcast_to(total_sum, (s,b,p))
            vifle_numerators = np.tile(np.expand_dims(total_sum, axis=3),(1,1,1,p))
            vifle_denominators = np.tile(np.expand_dims(total_sum, axis=3),(1,1,1,p))

            for i in range(p):
                vifle_numerators[:,:,i,i] = (cum_prs + log_p_div_q)[:, :, i] + future_fef[:,:,i] * future_fef_weights
                vifle_denominators[:,:,i,i] = cum_prs[:, :, i] + current_fef[:,:,i] * fef_weights

            vifle_numerators = logsumexp(vifle_numerators, axis=-1)
            vifle_denominators = logsumexp(vifle_denominators, axis=-1)
            high_var_coeff = vifle_numerators - vifle_denominators
        else:
            fr_numerators = cum_prs + log_p_div_q +  future_fef * future_fef_weights
            fr_numerators = logsumexp(fr_numerators, axis=-1)

            fr_denominators = cum_prs + current_fef * fef_weights
            fr_denominators = logsumexp(fr_denominators, axis=-1)
            high_var_coeff = fr_numerators - fr_denominators
    else:
        raise ValueError("Undefined alg_name %s" % alg_name)
    seg = {
        'ob': ob,
        'ac': ac,
        'mask': mask,
        'log_p_xz': log_p_xz,
        'high_var_coeff': high_var_coeff,
        'target_pr': log_p_div_q,
        'future_fef': future_fef,
        'current_fef': current_fef,
        'fef_weights': fef_weights,
        'future_fef_weights': future_fef_weights
    }
    return seg
Exemplo n.º 32
0
    def conditional_probability_of_n_purchases_up_to_time(
            self, n, t, frequency, recency, T):
        """
        Return conditional probability of n purchases up to time t.

        Calculate the probability of n purchases up to time t for an individual
        with history frequency, recency and T (age).

        The main equation being implemented is (16) from:
        http://www.brucehardie.com/notes/028/pareto_nbd_conditional_pmf.pdf

        Parameters
        ----------
        n: int
            number of purchases.
        t: a scalar
            time up to which probability should be calculated.
        frequency: float
            historical frequency of customer.
        recency: float
            historical recency of customer.
        T: float
            age of the customer.

        Returns
        -------
        array_like
        """

        if t <= 0:
            return 0

        x, t_x = frequency, recency
        params = self._unload_params("r", "alpha", "s", "beta")
        r, alpha, s, beta = params

        if alpha < beta:
            min_of_alpha_beta, max_of_alpha_beta, p, _, _ = (alpha, beta,
                                                             r + x + n, r + x,
                                                             r + x + 1)
        else:
            min_of_alpha_beta, max_of_alpha_beta, p, _, _ = (beta, alpha,
                                                             s + 1, s + 1, s)
        abs_alpha_beta = max_of_alpha_beta - min_of_alpha_beta

        log_l = self._conditional_log_likelihood(params, x, t_x, T)
        log_p_zero = (gammaln(r + x) + r * log(alpha) + s * log(beta) -
                      (gammaln(r) +
                       (r + x) * log(alpha + T) + s * log(beta + T) + log_l))
        log_B_one = (
            gammaln(r + x + n) + r * log(alpha) + s * log(beta) -
            (gammaln(r) +
             (r + x + n) * log(alpha + T + t) + s * log(beta + T + t)))
        log_B_two = (
            r * log(alpha) + s * log(beta) + gammaln(r + s + x) +
            betaln(r + x + n, s + 1) + log(
                hyp2f1(r + s + x, p, r + s + x + n + 1, abs_alpha_beta /
                       (max_of_alpha_beta + T))) -
            (gammaln(r) + gammaln(s) +
             (r + s + x) * log(max_of_alpha_beta + T)))

        def _log_B_three(i):
            return (r * log(alpha) + s * log(beta) + gammaln(r + s + x + i) +
                    betaln(r + x + n, s + 1) + log(
                        hyp2f1(r + s + x + i, p, r + s + x + n + 1,
                               abs_alpha_beta / (max_of_alpha_beta + T + t))) -
                    (gammaln(r) + gammaln(s) +
                     (r + s + x + i) * log(max_of_alpha_beta + T + t)))

        zeroth_term = (n == 0) * (1 - exp(log_p_zero))
        first_term = n * log(t) - gammaln(n + 1) + log_B_one - log_l
        second_term = log_B_two - log_l
        third_term = logsumexp([
            i * log(t) - gammaln(i + 1) + _log_B_three(i) - log_l
            for i in range(n + 1)
        ],
                               axis=0)

        try:
            size = len(x)
            sign = np.ones(size)
        except TypeError:
            sign = 1

        # In some scenarios (e.g. large n) tiny numerical errors in the calculation of second_term and third_term
        # cause sumexp to be ever so slightly negative and logsumexp throws an error. Hence we ignore the sign here.
        return zeroth_term + exp(
            logsumexp([first_term, second_term, third_term],
                      b=[sign, sign, -sign],
                      axis=0,
                      return_sign=True)[0])
Exemplo n.º 33
0
    def log_likelihood_ratio(self):
        waveform_polarizations =\
            self.waveform_generator.frequency_domain_strain(self.parameters)

        if waveform_polarizations is None:
            return np.nan_to_num(-np.inf)

        matched_filter_snr_squared = 0
        optimal_snr_squared = 0
        matched_filter_snr_squared_tc_array = np.zeros(
            self.interferometers.frequency_array[0:-1].shape,
            dtype=np.complex128)
        for interferometer in self.interferometers:
            signal_ifo = interferometer.get_detector_response(
                waveform_polarizations, self.parameters)

            matched_filter_snr_squared += interferometer.matched_filter_snr_squared(
                signal=signal_ifo)
            optimal_snr_squared += interferometer.optimal_snr_squared(
                signal=signal_ifo)
            if self.time_marginalization:
                matched_filter_snr_squared_tc_array +=\
                    4 / self.waveform_generator.duration * np.fft.fft(
                        signal_ifo[0:-1] *
                        interferometer.frequency_domain_strain.conjugate()[0:-1] /
                        interferometer.power_spectral_density_array[0:-1])

        if self.time_marginalization:

            if self.distance_marginalization:
                rho_mf_ref_tc_array, rho_opt_ref = self._setup_rho(
                    matched_filter_snr_squared_tc_array, optimal_snr_squared)
                if self.phase_marginalization:
                    dist_marged_log_l_tc_array = self._interp_dist_margd_loglikelihood(
                        abs(rho_mf_ref_tc_array), rho_opt_ref)
                    log_l = logsumexp(dist_marged_log_l_tc_array,
                                      b=self.time_prior_array)
                else:
                    dist_marged_log_l_tc_array = self._interp_dist_margd_loglikelihood(
                        rho_mf_ref_tc_array.real, rho_opt_ref)
                    log_l = logsumexp(dist_marged_log_l_tc_array,
                                      b=self.time_prior_array)
            elif self.phase_marginalization:
                log_l = logsumexp(
                    self._bessel_function_interped(
                        abs(matched_filter_snr_squared_tc_array)),
                    b=self.time_prior_array) - optimal_snr_squared / 2
            else:
                log_l = logsumexp(
                    matched_filter_snr_squared_tc_array.real,
                    b=self.time_prior_array) - optimal_snr_squared / 2

        elif self.distance_marginalization:
            rho_mf_ref, rho_opt_ref = self._setup_rho(
                matched_filter_snr_squared, optimal_snr_squared)
            if self.phase_marginalization:
                rho_mf_ref = abs(rho_mf_ref)
            log_l = self._interp_dist_margd_loglikelihood(
                rho_mf_ref.real, rho_opt_ref)[0]

        elif self.phase_marginalization:
            matched_filter_snr_squared = self._bessel_function_interped(
                abs(matched_filter_snr_squared))
            log_l = matched_filter_snr_squared - optimal_snr_squared / 2

        else:
            log_l = matched_filter_snr_squared.real - optimal_snr_squared / 2

        return log_l.real
Exemplo n.º 34
0
    def _loss_grad_lbfgs(self, transformation, X, mask, sign=1.0):
        """Compute the loss and the loss gradient w.r.t. ``transformation``.

        Parameters
        ----------
        transformation : array, shape (n_components, n_features)
            The linear transformation on which to compute loss and evaluate
            gradient

        X : array, shape (n_samples, n_features)
            The training samples.

        mask : array, shape (n_samples, n_samples)
            A mask where ``mask[i, j] == 1`` if ``X[i]`` and ``X[j]`` belong
            to the same class, and ``0`` otherwise.

        Returns
        -------
        loss : float
            The loss computed for the given transformation.

        gradient : array, shape (n_components * n_features,)
            The new (flattened) gradient of the loss.
        """

        if self.n_iter_ == 0:
            self.n_iter_ += 1
            if self.verbose:
                header_fields = ['Iteration', 'Objective Value', 'Time(s)']
                header_fmt = '{:>10} {:>20} {:>10}'
                header = header_fmt.format(*header_fields)
                cls_name = self.__class__.__name__
                print('[{}]'.format(cls_name))
                print('[{}] {}\n[{}] {}'.format(cls_name, header, cls_name,
                                                '-' * len(header)))

        t_funcall = time.time()

        transformation = transformation.reshape(-1, X.shape[1])
        X_embedded = np.dot(X, transformation.T)  # (n_samples, n_components)

        # Compute softmax distances
        p_ij = pairwise_distances(X_embedded, squared=True)
        np.fill_diagonal(p_ij, np.inf)
        p_ij = np.exp(-p_ij - logsumexp(-p_ij, axis=1)[:, np.newaxis])
        # (n_samples, n_samples)

        # Compute loss
        masked_p_ij = p_ij * mask
        p = np.sum(masked_p_ij, axis=1, keepdims=True)  # (n_samples, 1)
        loss = np.sum(p)

        # Compute gradient of loss w.r.t. `transform`
        weighted_p_ij = masked_p_ij - p_ij * p
        gradient = 2 * (X_embedded.T.dot(weighted_p_ij + weighted_p_ij.T) -
                        X_embedded.T * np.sum(weighted_p_ij, axis=0)).dot(X)
        # time complexity: O(n_components x n_samples x
        # min(n_samples, n_features))

        if self.verbose:
            t_funcall = time.time() - t_funcall
            values_fmt = '[{}] {:>10} {:>20.6e} {:>10.2f}'
            print(
                values_fmt.format(self.__class__.__name__, self.n_iter_, loss,
                                  t_funcall))
            sys.stdout.flush()

        return sign * loss, sign * gradient.ravel()
def normalize_log(l):
    return np.exp(l - logsumexp(l)).flatten()
def get_ess(logw_norm):
    return np.exp(-logsumexp(2 * logw_norm))
Exemplo n.º 37
0
def test_logsumexp():
    # Test whether logsumexp() function correctly handles large inputs.
    a = np.arange(200)
    desired = np.log(np.sum(np.exp(a)))
    assert_almost_equal(logsumexp(a), desired)

    # Now test with large numbers
    b = [1000, 1000]
    desired = 1000.0 + np.log(2.0)
    assert_almost_equal(logsumexp(b), desired)

    n = 1000
    b = np.full(n, 10000, dtype='float64')
    desired = 10000.0 + np.log(n)
    assert_almost_equal(logsumexp(b), desired)

    x = np.array([1e-40] * 1000000)
    logx = np.log(x)

    X = np.vstack([x, x])
    logX = np.vstack([logx, logx])
    assert_array_almost_equal(np.exp(logsumexp(logX)), X.sum())
    assert_array_almost_equal(np.exp(logsumexp(logX, axis=0)), X.sum(axis=0))
    assert_array_almost_equal(np.exp(logsumexp(logX, axis=1)), X.sum(axis=1))

    # Handling special values properly
    assert_equal(logsumexp(np.inf), np.inf)
    assert_equal(logsumexp(-np.inf), -np.inf)
    assert_equal(logsumexp(np.nan), np.nan)
    assert_equal(logsumexp([-np.inf, -np.inf]), -np.inf)

    # Handling an array with different magnitudes on the axes
    assert_array_almost_equal(
        logsumexp([[1e10, 1e-10], [-1e10, -np.inf]], axis=-1), [1e10, -1e10])

    # Test keeping dimensions
    assert_array_almost_equal(
        logsumexp([[1e10, 1e-10], [-1e10, -np.inf]], axis=-1, keepdims=True),
        [[1e10], [-1e10]])

    # Test multiple axes
    assert_array_almost_equal(
        logsumexp([[1e10, 1e-10], [-1e10, -np.inf]], axis=(-1, -2)), 1e10)
Exemplo n.º 38
0
def test_logsumexp_b_shape():
    a = np.zeros((4, 1, 2, 1))
    b = np.ones((3, 1, 5))

    logsumexp(a, b=b)
Exemplo n.º 39
0
def test_logsumexp_b_zero():
    a = [1, 10000]
    b = [1, 0]

    assert_almost_equal(logsumexp(a, b=b), 1)
Exemplo n.º 40
0
 def log_prob(self, x):
     return logsumexp(self.log_prob_components(x) + np.log(self.priors)[:, None], axis=0)
Exemplo n.º 41
0
def prob_by_vocab_overlap_sent():
  base_lans = ["aze"]
  #ts = [0.01, 0.05, 0.1, 0.1]
  #argmaxs = [False, False, False, True]
  ts = [0.1]
  argmaxs = [True]
  for base_lan in base_lans:
    for t, argmax in zip(ts, argmaxs):
      trg2srcs = {}
      lan_lists = [l.strip() for l in open("langs.txt", 'r').readlines()]
      lans = []
      for l in lan_lists:
        if l != base_lan: lans.append(l)
      lan_lists = lans

      out_probs = []
      for i, lan in enumerate(lan_lists):
        lm_file = "lmll/ted-train.mtok.{}.{}-vocab".format(lan, base_lan)
        lm_score = [float(l) for l in open(lm_file, 'r').readlines()]

        trg_file = "data/{}_eng/ted-train.mtok.spm8000.eng".format(lan)
        trg_sents = open(trg_file, 'r').readlines()
        out_probs.append([0 for _ in range(len(trg_sents))])
        line = 0
        for j, trg in enumerate(trg_sents):
          if trg not in trg2srcs: trg2srcs[trg] = []
          trg2srcs[trg].append([i, line, lm_score[j]])
          line += 1
      print("eng size: {}".format(len(trg2srcs)))
      for trg, src_list in trg2srcs.items():
        if argmax:
          max_score = 0
          for s in src_list:
            max_score = max(s[2], max_score)
          for s in src_list:
            if s[2] == max_score:
              out_probs[s[0]][s[1]] = 1
            else:
              out_probs[s[0]][s[1]] = 0
        else:
          sum_score = 0
          log_score = []
          for s in src_list:
            #s[2] = np.exp(-s[2] / t)
            #sum_score += s[2]
            s[2] = s[2] / t
            log_score.append(s[2])
          sum_score = logsumexp(log_score)
          for s in src_list:
            #s[2] = s[2] / sum_score
            s[2] = np.exp(s[2] - sum_score)
            out_probs[s[0]][s[1]] = s[2]

      for i, lan in enumerate(lan_lists):
        if argmax:
          out = open("data/{}_eng/ted-train.mtok.{}.prob-vocab-sent-{}-am".format(lan, lan, base_lan), "w")
        else:
          out = open("data/{}_eng/ted-train.mtok.{}.prob-vocab-sent-{}-t{}".format(lan, lan, base_lan, t), "w")
        #out = open(data_dir + "{}_en/ted-train.mtok.{}.prob-rank-{}-t{}-k{}-el".format(lan, lan, base_lan, t, k), "w")
        for p in out_probs[i]:
          out.write("{}\n".format(p))
        out.close()
      if argmax:
        out = open("data/{}_eng/ted-train.mtok.{}.prob-vocab-sent-{}-am".format(base_lan, base_lan, base_lan), "w")
      else:
        out = open("data/{}_eng/ted-train.mtok.{}.prob-vocab-sent-{}-t{}".format(base_lan, base_lan, base_lan, t), "w")
      #out = open(data_dir + "{}_en/ted-train.mtok.{}.prob-rank-{}-t{}-k{}".format(base_lan, base_lan, base_lan, t, k), "w")
      base_lines = len(open("data/{}_eng/ted-train.mtok.spm8000.eng".format(base_lan)).readlines())
      #base_lines = len(open(data_dir + "{}_en/ted-train.mtok.spm8000.en".format(base_lan)).readlines())
      for i in range(base_lines):
        out.write("{}\n".format(1))
      out.close()
Exemplo n.º 42
0
def plot_node(network,
              models,
              models_err,
              pos=None,
              idx=None,
              models_x=None,
              Nrsamp=1,
              Nmc=5,
              node_kwargs=None,
              violin_kwargs=None,
              rstate=None,
              discrete=False,
              *args,
              **kwargs):
    """
    Plot a 2-D projection of the network colored by the chosen variable.

    Parameters
    ----------
    network : `~frankenz.networks._Network`-derived object
        The trained and populated network object.

    models : `~numpy.ndarray` with shape (Nobj, Ndim)
        The models mapped onto the network.

    models_err : `~numpy.ndarray` with shape (Nobj, Ndim)
        Errors on the models.

    pos : tuple of shape (Nproj), optional
        The `Nproj`-dimensional position of the node. Mutually exclusive with
        `idx`.

    idx : int, optional
        Index of the node. Mutually exclusive with `pos`.

    models_x : `~numpy.ndarray` with shape (Ndim), optional
        The `x` values corresponding to the `Ndim` model values.

    Nrsamp : int, optional
        Number of times to resample the weighted collection of models
        associated with the given node. Default is `1`.

    Nmc : int, optional
        The number of Monte Carlo realizations of the model values if the
        errors are provided. Default is `5`.

    node_kwargs : kwargs, optional
        Keyword arguments to be passed to `~matplotlib.pyplot.plot` when
        plotting the node model.

    violin_kwargs : kwargs, optional
        Keyword arguments to be passed to `~matplotlib.pyplot.violinplot`
        when plotting the distribution of model values.

    rstate : `~numpy.random.RandomState` instance, optional
        Random state instance. If not passed, the default `~numpy.random`
        instance will be used.

    discrete : bool, optional
        Whether to assign weights based **only** on the best-fitting node
        rather than all nodes an object might be associated with.
        Default is `False`.

    """

    # Initialize values.
    if node_kwargs is None:
        node_kwargs = dict()
    if violin_kwargs is None:
        violin_kwargs = dict()
    if rstate is None:
        rstate = np.random
    if idx is None and pos is None:
        raise ValueError("Either `idx` or `pos` must be specified.")
    elif idx is not None and pos is not None:
        raise ValueError("Both `idx` and `pos` cannot be specified.")
    if models_x is None:
        models_x = np.arange(models.shape[-1]) + 1
    node_kwargs['color'] = node_kwargs.get('color', 'black')
    node_kwargs['marker'] = node_kwargs.get('marker', '*')
    node_kwargs['markersize'] = node_kwargs.get('markersize', '10')
    node_kwargs['alpha'] = node_kwargs.get('alpha', 0.6)
    violin_kwargs['widths'] = violin_kwargs.get('widths', 600)
    violin_kwargs['showextrema'] = violin_kwargs.get('showextrema', False)

    # Get node.
    (idx, node_model, pos, idxs, logwts, scales,
     scales_err) = network.get_node(pos=pos, idx=idx, discrete=discrete)
    tmodels, tmodels_err = models[idxs], models_err[idxs]  # grab models
    wts = np.exp(logwts - logsumexp(logwts))  # compute weights

    # Resample models.
    Nmatch = len(idxs)
    idx_rsamp = rstate.choice(Nmatch, p=wts, size=Nmatch * Nrsamp)

    # Perturb model values.
    tmodels_mc = rstate.normal(tmodels[idx_rsamp], tmodels_err[idx_rsamp])

    # Rescale results.
    snorm = np.mean(np.array(scales)[idx_rsamp])
    tmodels_mc /= (np.array(scales)[idx_rsamp, None] / snorm)

    # Rescale baseline model (correction should be small in most cases).
    mean_model = np.mean(tmodels_mc, axis=0)
    std_model = np.std(tmodels_mc, axis=0)
    num = np.dot(mean_model / std_model, node_model / std_model)
    den = np.dot(node_model / std_model, node_model / std_model)
    node_scale = num / den
    if abs(node_scale - 1.) < 0.05:
        node_scale = 1.

    # Plot results.
    plt.plot(models_x, node_model * node_scale, **node_kwargs)
    for i in range(models.shape[-1]):
        vals = tmodels_mc[:, i]
        plt.violinplot(vals, [models_x[i]], **violin_kwargs)
    plt.ylim(
        [min(mean_model - 3 * std_model),
         max(mean_model + 3 * std_model)])
Exemplo n.º 43
0
 def _transform(self, p_h, ionic_strength, temperature):
     return -R * temperature * logsumexp(
         self._dG0_prime_vector(p_h, ionic_strength, temperature) /
         (-R * temperature))
Exemplo n.º 44
0
    def _approx_bound(self, X, doc_topic_distr, sub_sampling):
        """Estimate the variational bound.

        Estimate the variational bound over "all documents" using only the
        documents passed in as X. Since log-likelihood of each word cannot
        be computed directly, we use this bound to estimate it.

        Parameters
        ----------
        X : array-like or sparse matrix, shape=(n_samples, n_features)
            Document word matrix.

        doc_topic_distr : array, shape=(n_samples, n_components)
            Document topic distribution. In the literature, this is called
            gamma.

        sub_sampling : boolean, optional, (default=False)
            Compensate for subsampling of documents.
            It is used in calculate bound in online learning.

        Returns
        -------
        score : float

        """
        def _loglikelihood(prior, distr, dirichlet_distr, size):
            # calculate log-likelihood
            score = np.sum((prior - distr) * dirichlet_distr)
            score += np.sum(gammaln(distr) - gammaln(prior))
            score += np.sum(gammaln(prior * size) - gammaln(np.sum(distr, 1)))
            return score

        is_sparse_x = sp.issparse(X)
        n_samples, n_components = doc_topic_distr.shape
        n_features = self.components_.shape[1]
        score = 0

        dirichlet_doc_topic = _dirichlet_expectation_2d(doc_topic_distr)
        dirichlet_component_ = _dirichlet_expectation_2d(self.components_)
        doc_topic_prior = self.doc_topic_prior_
        topic_word_prior = self.topic_word_prior_

        if is_sparse_x:
            X_data = X.data
            X_indices = X.indices
            X_indptr = X.indptr

        # E[log p(docs | theta, beta)]
        for idx_d in range(0, n_samples):
            if is_sparse_x:
                ids = X_indices[X_indptr[idx_d]:X_indptr[idx_d + 1]]
                cnts = X_data[X_indptr[idx_d]:X_indptr[idx_d + 1]]
            else:
                ids = np.nonzero(X[idx_d, :])[0]
                cnts = X[idx_d, ids]
            temp = (dirichlet_doc_topic[idx_d, :, np.newaxis] +
                    dirichlet_component_[:, ids])
            norm_phi = logsumexp(temp, axis=0)
            score += np.dot(cnts, norm_phi)

        # compute E[log p(theta | alpha) - log q(theta | gamma)]
        score += _loglikelihood(doc_topic_prior, doc_topic_distr,
                                dirichlet_doc_topic, self.n_components)

        # Compensate for the subsampling of the population of documents
        if sub_sampling:
            doc_ratio = float(self.total_samples) / n_samples
            score *= doc_ratio

        # E[log p(beta | eta) - log q (beta | lambda)]
        score += _loglikelihood(topic_word_prior, self.components_,
                                dirichlet_component_, n_features)

        return score
Exemplo n.º 45
0
def Parallel_estimate_mixture_params(EmissionParameters, curr_counts_orig, curr_nr_of_counts_orig, curr_state, rand_sample_size, max_nr_iter, nr_of_iter=20, stop_crit=1.0, nr_of_init=10, verbosity=1):
	'''
	This function estimates thedirichlet multinomial mixture parameters
	'''

	#1) Copy old parameters and use it as initialisation for the first iteration

	alphas_list = []
	mixtures_list = []
	lls_list = []
	curr_counts = deepcopy(curr_counts_orig)
	curr_nr_of_counts = deepcopy(curr_nr_of_counts_orig)

	if len(curr_counts.shape) == 1:
		curr_counts = np.expand_dims(curr_counts, axis=1)

	if np.sum(np.sum(curr_counts, axis=0) > 0) > 0:
		curr_nr_of_counts = curr_nr_of_counts[:, np.sum(curr_counts, axis=0) >0]
		curr_counts = curr_counts[:, np.sum(curr_counts, axis=0) >0]

	#Test for fitting distributions only on diag events
	if np.sum( np.sum(curr_counts, axis=0) > 10) > 10:
		curr_nr_of_counts = curr_nr_of_counts[:, np.sum(curr_counts, axis=0) > 10]
		curr_counts = curr_counts[:, np.sum(curr_counts, axis=0) > 10]

	tracks_per_rep = EmissionParameters['Diag_event_params']['alpha'][curr_state].shape[0]
	NrOfReplicates = curr_counts.shape[0] / tracks_per_rep
	
	if len(curr_counts.shape) == 1:
		curr_counts = np.expand_dims(curr_counts, axis=1)

	#Save old lls mixtures and alphas
	mixtures = deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state])

	scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) 
	scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1]))
	ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts)))

	alphas_list.append(deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state]))
	mixtures_list.append(deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state]))
	lls_list.append(ll)
 	
	np_proc = EmissionParameters['NbProc']
	data = zip(itertools.repeat(stop_crit), itertools.repeat(rand_sample_size), itertools.repeat(max_nr_iter), list(range(nr_of_init)), itertools.repeat(EmissionParameters), itertools.repeat(curr_state), itertools.repeat(curr_counts), itertools.repeat(curr_nr_of_counts)   )

	if np_proc == 1:
		results = [Parallel_estimate_single_mixture_params(args) for args in data]
	else:
		print("Spawning processes")
		pool = multiprocessing.Pool(np_proc, maxtasksperchild=5)
		results = pool.imap(Parallel_estimate_single_mixture_params, data, chunksize=1)
		pool.close()
		pool.join()
		print("Collecting results")
		results = [res for res in results]


	alphas_list += [res[0] for res in results]
	mixtures_list += [res[1] for res in results]
	lls_list += [res[2] for res in results]

	#select which alpha had the highest ll
	max_ll_pos = np.argmax(np.array(lls_list))

	#pdb.set_trace()
	alpha = alphas_list[max_ll_pos]
	mixtures = mixtures_list[max_ll_pos]
	return alpha, mixtures
    def posteriorPredictive(self, yi, y, return_params=False, orient=False):
        y = np.array(y)
        if not y.size:
            return self.nullPosteriorPredictive(yi)
        if not y.size or len(y.shape) == 1 or len(y.shape) == 0:
            y = y.reshape(1, -1)
        N = len(y)  # number of observations
        K = len(y[0])  # number of dimensions of each observation
        for i in range(N):
            assert len(y[i]) == K
        yi = np.array(yi)
        if len(yi.shape) > 1 and yi.shape[0] == 1:  # yi i
            yi = yi[0]
        try:
            assert (len(yi) == K)
        except TypeError:
            # if yi is zero-dimensional
            yi = np.array([
                yi,
            ])

        # p(yi|D) = int_params p(yi|params) p(params|D)
        # p(params|D) = p(D|params) p(params|params_0) / p(D)
        # p(yi|params) is predictive likelihood (need full matrix)
        # construct p(params|D) matrix from p(D|params) matrix (marginal likelihood likelihood matrix)
        #   and p(params|params_0) (marginal likelihood prior matrix)
        # p(D) is marginal likelihood
        _, pYk, pParams, pYkGivenParams, mv, lv = self.marginalLikelihood(
            y, return_dists=True)
        pParamsGivenYk = np.array(pYkGivenParams)
        for k in range(K):
            pParamsGivenYk[k] = pParams + pYkGivenParams[k] - pYk[k]

        mus, lambdas = self.mus, self.lambdas
        logdmudlambda = np.log(1. / len(mus) * 1. / len(lambdas))
        pYikGivenParams = np.empty((K, len(mus), len(lambdas)))
        # pdf(x, lambda, mu) = pdf(x-mu, lambda), so instead of looping over mu,
        # we calculate pdf(x-mus, lambda)
        for l in range(len(lambdas)):
            for k in range(K):
                # multiply by dmu dlambda when converting from pdf to discrete probability
                pYikGivenParams[k, :, l] = stats.vonmises.logpdf(
                    yi[k] - mus, lambdas[l]) + logdmudlambda

        if orient:
            pYikGivenYkWithOffset = np.empty((K, len(mus)))
            for k in range(K):
                p = pParamsGivenYk[k]
                for offset in range(len(mus)):
                    l = np.roll(pYikGivenParams[k], offset, axis=0)
                    pYikGivenYkWithOffset[k, offset] = logsumexp(p + l)
            # sum over k
            pYiGivenYWithOffset = np.sum(pYikGivenYkWithOffset, axis=0)
            bestOffset = np.argmax(pYiGivenYWithOffset)
            dmu = mus[1] - mus[0]
            orientation = np.mod(-bestOffset * dmu, 2 * np.pi)
            return pYiGivenYWithOffset[bestOffset], orientation
        else:
            # p(yi|D) = prod_k p(yi[k]|D[k]), equivalently sum of logs
            pYikGivenYk = np.array(yi)
            for k in range(K):
                pYikGivenYk[k] = logsumexp(pParamsGivenYk[k] +
                                           pYikGivenParams[k])
            return np.sum(pYikGivenYk)
def log_normalize_log(unnormalized):
    return unnormalized - logsumexp(unnormalized)
    def marginalLikelihood(self, y, return_dists=False):
        y = np.array(y)
        if not y.size:
            return 0.0  # log likelihood of 0 if y is empty
        if not y.size or len(y.shape) == 1 or len(y.shape) == 0:
            y = y.reshape(1, -1)
        N = len(y)  # number of observations
        K = len(y[0])  # number of dimensions of each observation
        for i in range(N):
            assert len(y[i]) == K

        mus, lambdas = self.mus, self.lambdas
        logdmudlambda = np.log(1. / len(mus) * 1. / len(lambdas))
        pParams = self.logprior
        try:
            pYkGivenParams = self.marginalLikelihoodCache[y.tobytes()]
        except KeyError:
            pYkGivenParams = np.empty((K, len(mus), len(lambdas)))
            # pdf(x, lambda, mu) = pdf(x-mu, lambda), so instead of looping over mu,
            # we calculate pdf(x-mus, lambda)
            for l in range(len(lambdas)):
                for k in range(K):
                    yikxmu = np.tile(y[:, k, np.newaxis], len(mus))
                    # multiply by dmu dlambda when converting from pdf to discrete probability
                    tmp = stats.vonmises.logpdf(yikxmu - mus,
                                                lambdas[l]) + logdmudlambda
                    # sum over yi in y
                    pYkGivenParams[k, :, l] = np.sum(tmp, axis=0)
            self.marginalLikelihoodCache[y.tobytes()] = pYkGivenParams
        pYk = np.empty((K, ))
        for k in range(K):
            pYk[k] = logsumexp(pParams + pYkGivenParams[k])
        # sum over k
        pY = np.sum(pYk)
        if return_dists:
            plot_dists = False
            if plot_dists:
                plt.figure()
                plt.subplot(2, 2, 1)
                plt.imshow(pParams,
                           aspect='auto',
                           vmin=-50,
                           vmax=0,
                           origin='lower',
                           extent=[
                               np.min(lambdas),
                               np.max(lambdas),
                               np.min(mus),
                               np.max(mus)
                           ])
                plt.ylabel('mu')
                plt.title('prior over parameters')
                plt.colorbar()
                plt.subplot(2, 2, 2)
                plt.imshow(pYkGivenParams[0],
                           aspect='auto',
                           vmin=-50,
                           vmax=0,
                           origin='lower',
                           extent=[
                               np.min(lambdas),
                               np.max(lambdas),
                               np.min(mus),
                               np.max(mus)
                           ])
                plt.colorbar()
                plt.xlabel('lambda')
                plt.title('likelihood of data given parameters')
                plt.subplot(2, 2, 3)
                plt.imshow(pParams + pYkGivenParams[0],
                           aspect='auto',
                           vmin=-50,
                           vmax=0,
                           origin='lower',
                           extent=[
                               np.min(lambdas),
                               np.max(lambdas),
                               np.min(mus),
                               np.max(mus)
                           ])
                plt.ylabel('mu')
                plt.xlabel('lambda')
                plt.title('posterior of data')
                plt.colorbar()
        if return_dists:
            return pY, pYk, pParams, pYkGivenParams, self.mv, self.lv
        return pY
def softmax(x, axis=None) -> np.ndarray:
    """
    Computes sofmax of input vector
    """
    return np.exp(x - logsumexp(x, axis=axis, keepdims=True))
Exemplo n.º 50
0
def prefix_search_log_cy(y_, alphabet=DNA_alphabet, return_forward=False):

    y = y_.astype(np.float64)

    # initialize prefix search variables
    stop_search = False
    search_level = 0
    top_label = ''
    curr_label = ''
    curr_label_alphas = []
    gap_prob = np.sum(y[:, -1])
    label_prob = {'': gap_prob}

    # initalize variables for 1d forward probabilities
    alpha_prev = decoding_cy.forward_vec_log(-1, search_level, y)
    #print(alpha_prev)
    top_forward = np.array([])
    prefix_forward = np.zeros(shape=(len(alphabet), len(y), len(y))) + LOG_0

    while not stop_search:
        prefix_prob = {}
        prefix_alphas = []
        search_level += 1

        for c, c_i in alphabet.items():
            prefix = curr_label + c
            prefix_int = [alphabet[i] for i in prefix]
            if c_i == 0:
                best_prefix = prefix

            alpha_ast = forward_vec_no_gap_log(prefix_int, y, alpha_prev)
            prefix_prob[prefix] = logsumexp(alpha_ast)

            # calculate label probability
            alpha = decoding_cy.forward_vec_log(c_i,
                                                search_level,
                                                y,
                                                previous=alpha_prev)
            prefix_forward[c_i, search_level - 1] = alpha
            label_prob[prefix] = alpha[-1]
            if label_prob[prefix] > label_prob[top_label]:
                top_label = prefix
                top_forward = prefix_forward[c_i, :len(prefix)]
                #print(len(top_label),len(top_forward))
            if prefix_prob[prefix] > prefix_prob[best_prefix]:
                best_prefix = prefix
            prefix_alphas.append(alpha)

            #print(search_level, 'extending by prefix:',c, 'Prefix Probability:',prefix_prob[prefix], 'Label probability:',label_prob[prefix], file=sys.stderr)

        #best_prefix = max(prefix_prob.items(), key=operator.itemgetter(1))[0]
        #print('best prefix is:',best_prefix, file=sys.stderr)

        if prefix_prob[best_prefix] < label_prob[top_label]:
            stop_search = True
        else:
            # get highest probability label
            #top_label = max(label_prob.items(), key=operator.itemgetter(1))[0]
            # then move to prefix with highest prefix probability
            curr_label = best_prefix
            alpha_prev = prefix_alphas[alphabet[curr_label[-1]]]

    if return_forward:
        return (top_label, top_forward.T)
    else:
        return (top_label, label_prob[top_label])
Exemplo n.º 51
0
def em_pmf(q, eps=1.0, sensitivity=1.0, monotonic=False):
    coef = 1.0 if monotonic else 0.5
    q = q - q.max()
    logits = coef*eps/sensitivity*q
    return np.exp(logits - logsumexp(logits))
Exemplo n.º 52
0
 def scipy_fun(array_to_reduce):
     return osp_special.logsumexp(array_to_reduce,
                                  axis,
                                  keepdims=keepdims,
                                  return_sign=return_sign)
Exemplo n.º 53
0
def train(speaker, X, M=8, epsilon=0.0, maxIter=20):
    ''' Train a model for the given speaker. Returns the theta (omega, mu, sigma)'''
    myTheta = theta(speaker, M, X.shape[1])
    #print ('TODO')

    # define variables
    T, d = X.shape

    # initialize
    ind = random.sample(range(T), M)
    myTheta.mu = X[np.array(ind)]
    myTheta.Sigma = np.ones(
        (M, d))  # this Mxd matrix consists of M diagonals of dxd matrix
    myTheta.omega[..., 0] = float(1) / M
    i = 0
    prev_L = float('-inf')
    improvement = float('inf')
    # log_Bs = np.zeros((M, T))
    # log_Ps = np.zeros((M, T))

    while i <= maxIter and improvement >= epsilon:
        preComputedForM = np.array(preCompute(myTheta)).reshape(
            (M, 1))  # M x 1

        # # compute log_Bs
        # # nested loop --- really slow for training
        # for m in tqdm(range(0, M)):
        #     for t in tqdm(range(0, T)):
        #         # log_Bs[m, t] = log_b_m_x( m, X[t], myTheta )
        #         log_Ps[m, t] = log_p_m_x( m, X[t], myTheta )
        # print("for loop: {}".format(log_Ps))

        # for efficiency, use matrix operation to compute log_Bs
        sigmaSquare = np.reciprocal(myTheta.Sigma,
                                    where=(myTheta.Sigma != 0))  # M x d
        xSquare = (0.5 * (X**2)).T  # d x T
        term1 = (-1) * np.dot(sigmaSquare, xSquare)  # M x T
        term2 = np.multiply(myTheta.mu, sigmaSquare)  # M x d
        term3 = np.dot(term2, X.T)  # M x T
        log_Bs = term1 + term3 - preComputedForM
        # print(log_Bs)

        # compute likelihood and update loop constraints
        L = logLik(log_Bs, myTheta)
        improvement = L - prev_L
        prev_L = L
        i += 1

        # compute Ps for the purpose of updating parameters
        # term4 = myTheta.omega * np.exp(log_Bs) # M x T
        # term5 = np.sum(term4, axis=0) # 1 x T
        # Ps = np.divide(term4, term5, out=np.zeros_like(term4), where=(term5 > 0)) # M x T

        # use logsumexp to compute in a more stable way
        term4 = np.log(myTheta.omega) + log_Bs - logsumexp(
            log_Bs, b=myTheta.omega, axis=0)
        Ps = np.exp(term4)  # make sure Ps >= 0
        # print(term4)
        # print(Ps)

        # update parameters
        term6 = np.sum(Ps, axis=1).reshape((M, 1))
        myTheta.omega = term6 / float(T)  # M times 1
        term7 = np.dot(Ps, X)
        myTheta.mu = np.divide(
            term7, term6, out=np.zeros_like(term7),
            where=(term6 != 0))  # M times d and M times 1 --> M x d
        term8 = np.dot(Ps, X**2)
        myTheta.Sigma = np.divide(
            term8, term6, out=np.zeros_like(term8), where=(term6 != 0)) - (
                myTheta.mu**2)  # M x d
        # print(myTheta.Sigma)

    return myTheta
Exemplo n.º 54
0
 def numeric(self, values):
     """Evaluates e^x elementwise, sums, and takes the log.
     """
     return logsumexp(values[0], axis=self.axis, keepdims=self.keepdims)
Exemplo n.º 55
0
def importance_sampler(raw_data, analysis_settings):
    """
   Recovers a curve that best explains the relationship between the predictor and dependent variables
   
   **Arguments**:
   - raw_data: The data matrix (total number of trials x 6 columns). Refer to RUN_IMPORTANCE_SAMPLER()
   - analysis_settings: A struct that holds algorithm relevant settings. Refer to RUN_IMPORTANCE_SAMPLER()

    Saves a .mat file in `current_path/analysis_id/analysis_id_importance_sampler.mat`
   """

    time = datetime.datetime.now()
    print('Start time {}/{} {}:{}'.format(time.month, time.day, time.hour,
                                          time.minute))

    # Resetting the random number seed
    random.seed()
    seed = random.getstate()

    # Preprocessing the data matrix and updating the analysis_settings struct with additional/missing information
    preprocessed_data, ana_opt = preprocessing_setup(raw_data,
                                                     analysis_settings)
    del raw_data
    del analysis_settings

    # Housekeeping
    importance_sampler = {}  # Creating the output struct
    hold_betas_per_iter = np.full(
        (ana_opt['em_iterations'] + 1, 2),
        np.nan)  # Matrix to hold betas over em iterations
    exp_max_f_values = np.full(
        (ana_opt['em_iterations'], 1),
        np.nan)  # Matrix to hold the f_values over em iterations
    normalized_w = np.full(
        (ana_opt['em_iterations'] + 1, ana_opt['particles']),
        np.nan)  # to hold the normalized weights

    global tau
    global bounds
    global w
    global net_effects
    global dependent_var

    # fetch parameters
    tau = ana_opt['tau']  # Store the tau for convenience
    bounds = family_of_curves(
        ana_opt['curve_type'],
        'get_bounds')  # Get the curve parameter absolute bounds
    nParam = family_of_curves(
        ana_opt['curve_type'],
        'get_nParams')  # Get the number of curve parameters
    hold_betas = [ana_opt['beta_0'],
                  ana_opt['beta_1']]  # Store the betas into a vector

    for em in range(ana_opt['em_iterations']):  # for every em iteration
        hold_betas_per_iter[
            em, :] = hold_betas  # Store the logreg betas over em iterations
        print('Betas: {}, {}'.format(hold_betas[0], hold_betas[1]))
        print('EM Iteration: {}'.format(em))

        # Initialize the previous iteration curve parameters, weight vector, net_effects and dependent_var matrices
        # Matrix to hold the previous iteration curve parameters
        prev_iter_curve_param = np.full(
            (ana_opt['particles'],
             family_of_curves(ana_opt['curve_type'], 'get_nParams')), np.nan)
        w = np.full((ana_opt['particles']),
                    np.nan)  # Vector to hold normalized weights

        # Matrix to hold the predictor variables (taking net effects if relevant) over all particles
        net_effects = np.full(
            (len(ana_opt['net_effect_clusters']), ana_opt['particles']),
            np.nan)
        dependent_var = np.array(
            []
        )  # can't be initialized in advance as we don't know its length (dropping outliers)

        # Sampling curve parameters
        if em == 0:  # only for the first em iteration
            param = common_to_all_curves(
                ana_opt['curve_type'], 'initial_sampling',
                ana_opt['particles'],
                ana_opt['resolution'])  # Good old uniform sampling
        else:  # for em iterations 2, 3, etc
            # Sample curve parameters from previous iteration's curve parameters based on normalized weights
            prev_iter_curve_param = param  # we need previous iteration's curve parameters to compute likelihood

            # Here we sample curves (with repetitions) based on the weights
            param = prev_iter_curve_param[
                random.choices(np.arange(ana_opt['particles']),
                               k=ana_opt['particles'],
                               weights=normalized_w[em - 1, :]), :]
            # Add Gaussian noise since some curves are going to be identical due to the repetitions
            # NOISE: Sample from truncated normal distribution using individual curve parameter bounds,
            # mean = sampled curve parameters and sigma = tau
            for npm in range(nParam):
                param[:, npm] = truncated_normal(bounds[npm, 0],
                                                 bounds[npm, 1], param[:, npm],
                                                 tau, ana_opt['particles'])

        # Check whether curve parameters lie within the upper and lower bounds
        param = common_to_all_curves(ana_opt['curve_type'],
                                     'check_if_exceed_bounds', param)
        if ana_opt['curve_type'] == 'horz_indpnt':
            # Check if the horizontal curve parameters are following the right trend i.e. x1 < x2
            param = common_to_all_curves(ana_opt['curve_type'],
                                         'sort_horizontal_params', param)

            # Compute the likelihood over all subjects (i.e. log probability mass function if logistic regression)
        #  This is where we use the chunking trick II
        for ptl_idx in range(np.shape(ana_opt['ptl_chunk_idx'])[0]):
            output_struct = family_of_curves(
                ana_opt['curve_type'], 'compute_likelihood',
                ana_opt['net_effect_clusters'],
                ana_opt['ptl_chunk_idx'][ptl_idx, 2],
                param[int(ana_opt['ptl_chunk_idx'][
                    ptl_idx, 0]):int(ana_opt['ptl_chunk_idx'][ptl_idx, 1]), :],
                hold_betas, preprocessed_data, ana_opt['distribution'],
                ana_opt['dist_specific_params'],
                ana_opt['data_matrix_columns'])

            # Gather weights
            w[int(ana_opt['ptl_chunk_idx'][ptl_idx,
                                           0]):int(ana_opt['ptl_chunk_idx'][
                                               ptl_idx,
                                               1])] = output_struct['w']

            # Gather predictor variable
            net_effects[:, int(ana_opt['ptl_chunk_idx'][ptl_idx, 0]):int(ana_opt['ptl_chunk_idx'][ptl_idx, 1])] = \
                  output_struct['net_effects']
            if ptl_idx == 0:
                # Gather dependent variable only once, since it is the same across all ptl_idx
                dependent_var = output_struct['dependent_var']

        del output_struct
        if np.any(np.isnan(w)):
            raise ValueError('NaNs in normalized weight vector w!')

        # Compute the p(theta) and q(theta) weights
        if em > 0:
            p_theta_minus_q_theta = compute_weights(
                ana_opt['curve_type'], ana_opt['particles'],
                normalized_w[em - 1, :], prev_iter_curve_param, param,
                ana_opt['wgt_chunks'], ana_opt['resolution'])
            w += p_theta_minus_q_theta

        w = np.exp(
            w - special.logsumexp(w)
        )  # Normalize the weights using logsumexp to avoid numerical underflow
        normalized_w[em, :] = w  # Store the normalized weights

        # Optimize betas using fminunc
        optimizing_function = family_of_distributions(
            ana_opt['distribution'], 'fminunc_both_betas', w, net_effects,
            dependent_var, ana_opt['dist_specific_params'])

        result = optimize.minimize(optimizing_function,
                                   np.array(hold_betas),
                                   jac=True,
                                   options={
                                       'disp': True,
                                       'return_all': True
                                   })
        hold_betas = result.x
        f_value = result.fun

        exp_max_f_values[
            em] = f_value  # gather the f_values over em iterations

    hold_betas_per_iter[
        em + 1, :] = hold_betas  # Store away the last em iteration betas
    print('>>>>>>>>> Final Betas: {}, {} <<<<<<<<<'.format(
        hold_betas[0], hold_betas[1]))

    # Flipping the vertical curve parameters if beta_1 is negative
    importance_sampler['flip'] = False
    neg_beta_idx = hold_betas[1] < 0
    if neg_beta_idx:
        print('!!!!!!!!!!!!!!!!!!!! Beta 1 is flipped !!!!!!!!!!!!!!!!!!!!')
        hold_betas[1] = hold_betas[1] * -1
        param = common_to_all_curves(ana_opt['curve_type'],
                                     'flip_vertical_params', param)
        importance_sampler['flip'] = True

    w = np.full((ana_opt['particles']), np.nan)  # Clearing the weight vector

    # Used for a likelihoods ratio test to see if our beta1 value is degenerate
    w_null_hypothesis = np.full((ana_opt['particles']), np.nan)

    # The null hypothesis for the likelihoods ratio test states that our model y_hat = beta_0 + beta_1 * predictor
    # variable is no different than the simpler model y_hat = beta_0 + beta_1 * predictor variable WHERE BETA_1 =
    # ZERO i.e. our model is y_hat = beta_0
    null_hypothesis_beta = [hold_betas[0], 0]

    for ptl_idx in range(np.shape(ana_opt.ptl_chunk_idx)[0]):
        output_struct = family_of_curves(
            ana_opt['curve_type'], 'compute_likelihood',
            ana_opt['net_effect_clusters'], ana_opt['ptl_chunk_idx'][ptl_idx,
                                                                     3],
            param[ana_opt['ptl_chunk_idx'][ptl_idx,
                                           1]:ana_opt['ptl_chunk_idx'][ptl_idx,
                                                                       2], :],
            hold_betas, preprocessed_data, ana_opt['distribution'],
            ana_opt['dist_specific_params'], ana_opt['data_matrix_columns'])
        w[ana_opt['ptl_chunk_idx'][ptl_idx, 1]:ana_opt['ptl_chunk_idx'][
            ptl_idx, 2]] = output_struct['w']

    # this code computes the log likelihood of the data under the null hypothesis i.e. using null_hypothesis_beta
    # instead of hold_betas -- it's "lazy" because, unlike the alternative hypothesis, we don't have to compute the
    # data likelihood for each particle because it's exactly the same for each particle (b/c compute_likelihood uses
    # z = beta_1 * x + beta_0, but (recall that our particles control the value of x in this equation) beta_1 is zero
    # for the null hypothesis) that's why we pass in the zero vector representing a single particle with irrelevant
    # weights so we don't have to do it for each particle unnecessarily
    output_struct_null_hypothesis_lazy = family_of_curves(
        ana_opt['curve_type'], 'compute_likelihood',
        ana_opt['net_effect_clusters'], 1, [0, 0, 0, 0, 0, 0],
        null_hypothesis_beta, preprocessed_data, ana_opt['distribution'],
        ana_opt['dist_specific_params'], ana_opt['data_matrix_columns'])
    data_likelihood_null_hypothesis = output_struct_null_hypothesis_lazy['w']
    data_likelihood_alternative_hypothesis = w

    w = w + p_theta_minus_q_theta
    if np.any(np.isnan(w)):
        raise ValueError('NaNs in normalized weight vector w!')

    w = np.exp(
        w - special.logsumexp(w)
    )  # Normalize the weights using logsumexp to avoid numerical underflow
    normalized_w[em + 1, :] = w  # Store the normalized weights

    # Added for debugging chi-sq, might remove eventually
    importance_sampler[
        'data_likelihood_alternative_hypothesis'] = data_likelihood_alternative_hypothesis
    importance_sampler[
        'data_likelihood_null_hypothesis'] = data_likelihood_null_hypothesis

    # we calculate the data_likelihood over ALL particles by multiplying the data_likelihood for each particle by
    # that particle's importance weight
    dummy_var, importance_sampler['likratiotest'] = likratiotest(
        w * np.transpose(data_likelihood_alternative_hypothesis),
        data_likelihood_null_hypothesis, 2, 1)

    if np.any(np.isnan(normalized_w)):
        raise ValueError('NaNs in normalized weights vector!')
    if np.any(np.isnan(exp_max_f_values)):
        raise ValueError('NaNs in Expectation maximilzation fval matrix!')
    if np.any(np.isnan(hold_betas_per_iter)):
        raise ValueError('NaNs in hold betas matrix!')

    importance_sampler['normalized_weights'] = normalized_w
    importance_sampler['exp_max_fval'] = exp_max_f_values
    importance_sampler['hold_betas_per_iter'] = hold_betas_per_iter
    importance_sampler['curve_params'] = param
    importance_sampler['analysis_settings'] = ana_opt

    if ana_opt['bootstrap']:
        sio.savemat(
            '{}/{}_b{}_importance_sampler.mat'.format(
                ana_opt['target_dir'], ana_opt['analysis_id'],
                ana_opt['bootstrap_run']),
            {'importance_sampler': importance_sampler})
    elif ana_opt['scramble']:
        sio.savemat(
            '{}/{}_s{}_importance_sampler.mat'.format(ana_opt['target_dir'],
                                                      ana_opt['analysis_id'],
                                                      ana_opt['scramble_run']),
            {'importance_sampler': importance_sampler})
    else:
        sio.savemat(
            '{}/{}_importance_sampler.mat'.format(ana_opt['target_dir'],
                                                  ana_opt['analysis_id']),
            {'importance_sampler': importance_sampler})
    print('Results are stored in be stored in {}'.format(
        ana_opt['target_dir']))

    time = datetime.datetime.now()
    print('Finish time {}/{} {}:{}'.format(time.month, time.day, time.hour,
                                           time.minute))
Exemplo n.º 56
0
def estimate_mixture_params(EmissionParameters, curr_counts_orig, curr_nr_of_counts_orig, curr_state, rand_sample_size, max_nr_iter, nr_of_iter=20, stop_crit=1.0, nr_of_init=10, verbosity=1):
	'''
	This function estimates thedirichlet multinomial mixture parameters
	'''
	#1) Copy old parameters and use it as initialisation for the first iteration

	alphas_list = []
	mixtures_list = []
	lls_list = []
	curr_counts = deepcopy(curr_counts_orig)
	curr_nr_of_counts = deepcopy(curr_nr_of_counts_orig)

	if len(curr_counts.shape) == 1:
		curr_counts = np.expand_dims(curr_counts, axis=1)

	if np.sum(np.sum(curr_counts, axis=0) > 0) > 0:
		curr_nr_of_counts = curr_nr_of_counts[:, np.sum(curr_counts, axis=0) >0]
		curr_counts = curr_counts[:, np.sum(curr_counts, axis=0) >0]

	#Test for fitting distributions only on diag events
	curr_nr_of_counts = curr_nr_of_counts[:, np.sum(curr_counts, axis=0) > 10]
	curr_counts = curr_counts[:, np.sum(curr_counts, axis=0) > 10]

	tracks_per_rep = EmissionParameters['Diag_event_params']['alpha'][curr_state].shape[0]
	NrOfReplicates = curr_counts.shape[0] / tracks_per_rep
	
	noncon = np.sum(curr_counts[[tracks_per_rep - 1 + (tracks_per_rep * i) for i in range(NrOfReplicates)],:], axis=0)
	conv = np.sum(curr_counts, axis=0) - noncon
	ratio = conv / np.float64(conv + noncon)
	rat_ix = ((ratio > 0.05) * (ratio < 0.95)) > 0
	rat_ix = ((ratio < 0.95)) > 0
	curr_nr_of_counts = curr_nr_of_counts[:, rat_ix]
	curr_counts = curr_counts[:, rat_ix]
	#end test

	if len(curr_counts.shape) == 1:
		curr_counts = np.expand_dims(curr_counts, axis=1)

	#Save old lls mixtures and alphas
	mixtures = deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state])
	OldAlpha = deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state])
	#pdb.set_trace()
	scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) 
	scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1]))
	ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts)))

	alphas_list.append(deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state]))
	mixtures_list.append(deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state]))
	lls_list.append(ll)
 	
	for curr_init in range(nr_of_init):
		#compute the curr mixture, ll and alpha
		#initialiste the parameters
		old_ll = 0
		if curr_init == 0:
			OldAlpha = deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state])
			mixtures = deepcopy(mixtures)
		else:
			OldAlpha = np.random.uniform(low=0.0001, high=0.1, size=OldAlpha.shape)
			for i in range(OldAlpha.shape[1]):
				OldAlpha[np.random.randint(OldAlpha.shape[0]-1), i] = np.random.random() * 10.0
				OldAlpha[-2, i] = np.random.random() * 1.0
				OldAlpha[-1, i] = np.random.random() * 10.0
			mixtures = np.random.uniform(low=0.0001, high=1.0, size=mixtures.shape)
			mixtures /= np.sum(mixtures)
		if EmissionParameters['Diag_event_params']['nr_mix_comp'] == 1:
			#Case that only one mixture component is given
			EmissionParameters['Diag_event_params']['alpha'][curr_state][:, 0] = diag_event_model.estimate_multinomial_parameters(curr_counts, curr_nr_of_counts, EmissionParameters, OldAlpha[:])
			#compute ll
			scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) 
			scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1]))
			ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts)))

			alphas_list.append(deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state]))
			mixtures_list.append(deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state]))
			lls_list.append(ll)
		else:
			zero_ix = []
			for iter_nr in range(max_nr_iter):
				print('em-iteration ' + str(iter_nr))

				scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) 
				scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1]))
				# 2) Compute the mixture components
				#compute the normalisation factor
				normalised_likelihood = logsumexp(scored_counts, axis=0) 

				old_ll = ll
				ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts)))

				if np.abs(old_ll - ll) < stop_crit:
					#Check if convergence has been reached
					if len(zero_ix) == 0:
						break

				normalised_scores = scored_counts - np.tile(normalised_likelihood, (scored_counts.shape[0], 1))
				un_norm_mixtures = logsumexp(normalised_scores, b=np.tile(curr_nr_of_counts, (scored_counts.shape[0], 1)), axis = 1)
				
				mixtures = np.exp(un_norm_mixtures - logsumexp(un_norm_mixtures))
				
				# 3) Compute for eachcount the most likely mixture component
				curr_weights = np.exp(normalised_scores)
				curr_weights = (curr_weights == np.tile(np.max(curr_weights, axis=0), (curr_weights.shape[0], 1))) *1.0

				zero_mix = np.sum(curr_weights,axis=1) == 0
				zero_ix = np.where(zero_mix)[0].tolist()
			   
				EmissionParameters['Diag_event_params']['mix_comp'][curr_state] = mixtures
				
				#Get number of positions that are used. (In case there are fewer entries that rand_sample_size in counts)
				rand_size = min(rand_sample_size, curr_counts.shape[1])
				for i in zero_ix:
					random_ix = np.random.choice(curr_counts.shape[1], rand_size, p=(curr_nr_of_counts[0, :] / np.float(np.sum(curr_nr_of_counts[0, :]))))
					curr_counts = np.hstack([curr_counts, curr_counts[:, random_ix]])
					curr_nr_of_counts = np.hstack([curr_nr_of_counts, np.ones((1, rand_size))])
					temp_array = np.zeros((normalised_scores.shape[0], rand_size))
					temp_array[i, :] = i
					normalised_scores = np.hstack([normalised_scores, temp_array])
					temp_array = np.zeros((curr_weights.shape[0], rand_size))
					temp_array[i, :] = 1
					curr_weights = np.hstack([curr_weights, temp_array])

				# 4) Compute the dirichlet-multinomial parameters
				for curr_mix_comp in range(EmissionParameters['Diag_event_params']['nr_mix_comp']):
					local_counts = curr_counts
					local_nr_counts = curr_nr_of_counts * curr_weights[curr_mix_comp, :]
					local_counts = local_counts[:, local_nr_counts[0, :] > 0]
					local_nr_counts = local_nr_counts[0, local_nr_counts[0, :] > 0]
					if len(local_counts.shape) == 1:
						local_counts = np.expand_dims(local_counts, axis=1)
					curr_alpha = diag_event_model.estimate_multinomial_parameters(local_counts, local_nr_counts, EmissionParameters, OldAlpha[:, curr_mix_comp])
					
					if curr_mix_comp in zero_ix:
						OldAlpha[:, curr_mix_comp] = np.random.uniform(low=0.0001, high=0.1, size=OldAlpha[:, curr_mix_comp].shape)
						OldAlpha[np.random.randint(OldAlpha.shape[0]), curr_mix_comp] = np.random.random() * 10.0
						OldAlpha[-2, curr_mix_comp] = np.random.random() * 1.0
						OldAlpha[-1, curr_mix_comp] = np.random.random() * 10.0
					else:
						OldAlpha[:, curr_mix_comp] = curr_alpha
				
				if (len(zero_ix) > 0) and (iter_nr + 2 < max_nr_iter):
					#Treat the case where some mixtures have prob zero
					mixtures[zero_ix] = np.mean(mixtures)
					mixtures /= np.sum(mixtures)
					EmissionParameters['Diag_event_params']['mix_comp'][curr_state] = deepcopy(mixtures)
				EmissionParameters['Diag_event_params']['alpha'][curr_state] = deepcopy(OldAlpha)
				# Check if convergence has been achieved.
			
			alphas_list.append(deepcopy(OldAlpha))
			mixtures[zero_ix] = np.min(mixtures[mixtures > 0])
			mixtures /= np.sum(mixtures)
			mixtures_list.append(deepcopy(mixtures))
			lls_list.append(ll)

	#select which alpha had the highest ll
	max_ll_pos = np.argmax(np.array(lls_list))
	
	alpha = alphas_list[max_ll_pos]
	mixtures = mixtures_list[max_ll_pos]

	return alpha, mixtures
Exemplo n.º 57
0
def normalizelogspace(x):
    L = logsumexp(x, axis=1).reshape(-1, 1)
    Lnew = np.repeat(L, 3, axis=1)
    y = x - Lnew
    return y, Lnew
Exemplo n.º 58
0
def Parallel_estimate_single_mixture_params(args):
	'''
	This function estimates thedirichlet multinomial mixture parameters
	'''

	stop_crit, rand_sample_size, max_nr_iter, curr_init, EmissionParameters, curr_state, curr_counts, curr_nr_of_counts = args
	#compute the curr mixture, ll and alpha
	#initialiste the parameters
	old_ll = 0
	ll = -10

	OldAlpha = deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state])
	mixtures = deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state])

	if curr_init > 0:
		OldAlpha = np.random.uniform(low=0.0001, high=0.1, size=OldAlpha.shape)
		for i in range(OldAlpha.shape[1]):
			OldAlpha[np.random.randint(OldAlpha.shape[0]-1), i] = np.random.random() * 10.0
			OldAlpha[-2, i] = np.random.random() * 1.0
			OldAlpha[-1, i] = np.random.random() * 10.0
		mixtures = np.random.uniform(low=0.0001, high=1.0, size=mixtures.shape)
		mixtures /= np.sum(mixtures)
	if EmissionParameters['Diag_event_params']['nr_mix_comp'] == 1:
		#Case that only one mixture component is given
		EmissionParameters['Diag_event_params']['alpha'][curr_state][:, 0] = diag_event_model.estimate_multinomial_parameters(curr_counts, curr_nr_of_counts, EmissionParameters, OldAlpha[:])
		#compute ll
		scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) 
		scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1]))
		ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts)))

		OldAlpha = deepcopy(EmissionParameters['Diag_event_params']['alpha'][curr_state])
		mixtures = deepcopy(EmissionParameters['Diag_event_params']['mix_comp'][curr_state])

	else:
		zero_ix = []
		for iter_nr in range(max_nr_iter):
			print('em-iteration ' + str(iter_nr))	

			scored_counts = score_counts(curr_counts, curr_state, EmissionParameters) 
			scored_counts += np.tile(np.log(mixtures[:, np.newaxis]), (1, scored_counts.shape[1]))
			# 2) Compute the mixture components
			#compute the normalisation factor
			normalised_likelihood = logsumexp(scored_counts, axis=0) 

			old_ll = ll
			ll = np.sum(np.sum(logsumexp(scored_counts, axis=0) + np.log(curr_nr_of_counts)))

			if np.abs(old_ll - ll) < stop_crit:
				if len(zero_ix) == 0:
					break

			normalised_scores = scored_counts - np.tile(normalised_likelihood, (scored_counts.shape[0], 1))
			un_norm_mixtures = logsumexp(normalised_scores, b=np.tile(curr_nr_of_counts, (scored_counts.shape[0], 1)), axis = 1)
			
			mixtures = np.exp(un_norm_mixtures - logsumexp(un_norm_mixtures))
			
			# 3) Compute for eachcount the most likely mixture component
			curr_weights = np.exp(normalised_scores)
			curr_weights = (curr_weights == np.tile(np.max(curr_weights, axis=0), (curr_weights.shape[0], 1))) *1.0

			zero_mix = np.sum(curr_weights,axis=1) == 0
			zero_ix = np.where(zero_mix)[0].tolist()
		   
			EmissionParameters['Diag_event_params']['mix_comp'][curr_state] = mixtures
			#Get number of positions that are used. (In case there are fewer entries that rand_sample_size in counts)
			rand_size = min(rand_sample_size, curr_counts.shape[1])
			for i in zero_ix:
				random_ix = np.random.choice(curr_counts.shape[1], rand_size, p=(curr_nr_of_counts[0, :] / np.float(np.sum(curr_nr_of_counts[0, :]))))
				curr_counts = np.hstack([curr_counts, curr_counts[:, random_ix]])
				curr_nr_of_counts = np.hstack([curr_nr_of_counts, np.ones((1, rand_size))])
				temp_array = np.zeros((normalised_scores.shape[0], rand_size))
				temp_array[i, :] = i
				normalised_scores = np.hstack([normalised_scores, temp_array])
				temp_array = np.zeros((curr_weights.shape[0], rand_size))
				temp_array[i, :] = 1
				curr_weights = np.hstack([curr_weights, temp_array])

			# 4) Compute the dirichlet-multinomial parameters
			for curr_mix_comp in range(EmissionParameters['Diag_event_params']['nr_mix_comp']):
				local_counts = curr_counts
				local_nr_counts = curr_nr_of_counts * curr_weights[curr_mix_comp, :]
				local_counts = local_counts[:, local_nr_counts[0, :] > 0]
				local_nr_counts = local_nr_counts[0, local_nr_counts[0, :] > 0]
				if len(local_counts.shape) == 1:
					local_counts = np.expand_dims(local_counts, axis=1)
				curr_alpha = diag_event_model.estimate_multinomial_parameters(local_counts, local_nr_counts, EmissionParameters, OldAlpha[:, curr_mix_comp])
				
				if curr_mix_comp in zero_ix:
					OldAlpha[:, curr_mix_comp] = np.random.uniform(low=0.0001, high=0.1, size=OldAlpha[:, curr_mix_comp].shape)
					OldAlpha[np.random.randint(OldAlpha.shape[0]), curr_mix_comp] = np.random.random() * 10.0
					OldAlpha[-2, curr_mix_comp] = np.random.random() * 1.0
					OldAlpha[-1, curr_mix_comp] = np.random.random() * 10.0
				else:
					OldAlpha[:, curr_mix_comp] = curr_alpha

			if (len(zero_ix) > 0) and (iter_nr + 2 < max_nr_iter):
				#Treat the case where some mixtures have prob zero
				mixtures[zero_ix] = np.mean(mixtures)
				mixtures /= np.sum(mixtures)
				EmissionParameters['Diag_event_params']['mix_comp'][curr_state] = deepcopy(mixtures)
			EmissionParameters['Diag_event_params']['alpha'][curr_state] = deepcopy(OldAlpha)
			# Check if convergence has been achieved.
		
		mixtures[zero_ix] = np.min(mixtures[mixtures > 0])
		mixtures /= np.sum(mixtures)
	
	return [deepcopy(OldAlpha), mixtures, ll]
Exemplo n.º 59
0
def plot2d_network(network,
                   counts='weighted',
                   label_name=None,
                   labels=None,
                   labels_err=None,
                   vals=None,
                   dims=(0, 1),
                   cmap='viridis',
                   Nmc=5,
                   point_est='median',
                   plot_kwargs=None,
                   rstate=None,
                   discrete=False,
                   verbose=True,
                   *args,
                   **kwargs):
    """
    Plot a 2-D projection of the network colored by the chosen variable.

    Parameters
    ----------
    network : `~frankenz.networks._Network`-derived object
        The trained and populated network object.

    counts : {'absolute', 'weighted'}, optional
        The number density of objects mapped onto the network. If
        `'absolute'`, the raw number of objects associated with each node
        will be plotted. If `'weighted'`, the weighted number of objects
        will be shown. Default is `'weighted'`.

    labels : `~numpy.ndarray` with shape (Nobj), optional
        The labels we want to project over the network. Will override
        `counts` if provided.

    label_name : str, optional
        The name of the label.

    labels_err : `~numpy.ndarray` with shape (Nobj), optional
        Errors on the labels.

    vals : `~numpy.ndarray` with shape (Nnodes), optional
        The values to be plotted directly on the network. Overrides
        `labels`.

    dims : 2-tuple, optional
        The `(x, y)` dimensions the network should be plotted over. Default is
        `(0, 1)`.

    cmap : colormap, optional
        The colormap used when plotting results. Default is `'viridis'`.

    Nmc : int, optional
        The number of Monte Carlo realizations of the label value(s) if the
        error(s) are provided. Default is `5`.

    point_est : str or func, optional
        The point estimator to be plotted. Pre-defined options include
        `'mean'`, `'median'`, `'std'`, and `'mad'`. If a function is passed,
        it will be used to compute the weighted point estimate using input
        of the form `(labels, wts)`. Default is `'median'`.

    plot_kwargs : kwargs, optional
        Keyword arguments to be passed to `~matplotlib.pyplot.scatter`.

    rstate : `~numpy.random.RandomState` instance, optional
        Random state instance. If not passed, the default `~numpy.random`
        instance will be used.

    discrete : bool, optional
        Whether to assign weights based **only** on the best-fitting node
        rather than all nodes an object might be associated with.
        Default is `False`.

    verbose : bool, optional
        Whether to print progress. Default is `True`.

    Returns
    -------
    vals : `~numpy.ndarray` with shape (Nnodes)
        Corresponding point estimates for the input labels.

    """

    # Initialize values.
    if plot_kwargs is None:
        plot_kwargs = dict()
    if rstate is None:
        rstate = np.random
    if label_name is None and (labels is not None or vals is not None):
        label_name = 'Node Value'
    Nnodes = network.NNODE
    xpos = network.nodes_pos[:, dims[0]]
    ypos = network.nodes_pos[:, dims[1]]

    # Compute counts.
    if counts == 'absolute' and labels is None and vals is None:
        vals = network.nodes_Nmatch
        if label_name is None:
            label_name = 'Counts'
    elif counts == 'weighted' and labels is None and vals is None:
        vals = np.array(
            [np.exp(logsumexp(logwts)) for logwts in network.nodes_logwts])
        if label_name is None:
            label_name = 'Weighted Counts'

    # Compute point estimates.
    if vals is None and labels is not None:
        vals = np.zeros(Nnodes)
        for i in range(Nnodes):
            # Print progress.
            if verbose:
                sys.stderr.write('\rComputing {0} estimate {1}/{2}'.format(
                    label_name, i + 1, Nnodes))
                sys.stderr.flush()
            # Grab relevant objects.
            idxs = network.nodes_idxs[i]
            if discrete:
                logwts = np.log(network.nodes_bmus[i] + 1e-100)
            else:
                logwts = network.nodes_logwts[i]
            wts = np.exp(logwts - logsumexp(logwts))  # normalized weights
            ys = labels[idxs]  # labels
            Ny = len(ys)
            # Account for label errors (if provided) using Monte Carlo methods.
            if labels_err is not None:
                yes = labels_err[idxs]  # errors
                ys = rstate.normal(ys, yes, size=(Nmc, Ny)).flatten()
                wts = np.tile(wts, Nmc) / Nmc
            if point_est == 'mean':
                # Compute weighted mean.
                val = np.dot(wts, ys)
            elif point_est == 'median':
                # Compute weighted median.
                sort_idx = np.argsort(ys)
                sort_cdf = wts[sort_idx].cumsum()
                val = np.interp(0.5, sort_cdf, ys[sort_idx])
            elif point_est == 'std':
                # Compute weighted std.
                ymean = np.dot(wts, ys)  # mean
                val = np.dot(wts, np.square(ys - ymean))
            elif point_est == 'mad':
                # Compute weighted MAD.
                sort_idx = np.argsort(ys)
                sort_cdf = wts[sort_idx].cumsum()
                ymed = np.interp(0.5, sort_cdf, ys[sort_idx])  # median
                dev = np.abs(ys - ymed)  # absolute deviation
                sort_idx = np.argsort(dev)
                sort_cdf = wts[sort_idx].cumsum()
                val = np.interp(0.5, sort_cdf, dev[sort_idx])
            else:
                try:
                    val = point_est(ys, wts)
                except:
                    raise RuntimeError("`point_est` function failed!")
            vals[i] = val
        if verbose:
            sys.stderr.write('\n')
            sys.stderr.flush()

    # Plot results.
    plt.scatter(xpos, ypos, c=vals, cmap=cmap, **plot_kwargs)
    plt.xlabel(r'$x_{0}$'.format(str(dims[0])))
    plt.ylabel(r'$x_{0}$'.format(str(dims[1])))
    plt.colorbar(label=label_name)

    return vals
Exemplo n.º 60
0
    def test_get_ll_contrib(self):
        # batch_size = 3, trimmed_input_len = 3
        #
        # In the first instance, the contribution to the likelihood should
        # come from both the generation scores and the copy scores, since the
        # token is in the source sentence and the target vocabulary.
        # In the second instance, the contribution should come only from the
        # generation scores, since the token is not in the source sentence.
        # In the third instance, the contribution should come only from the copy scores,
        # since the token is in the source sequence but is not in the target vocabulary.

        vocab = self.model.vocab

        generation_scores = torch.tensor([
            [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7,
             0.8],  # these numbers are arbitrary.
            [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8],
            [0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
        ])
        # shape: (batch_size, target_vocab_size)

        copy_scores = torch.tensor([[1.0, 2.0, 1.0], [1.0, 2.0, 3.0],
                                    [2.0, 2.0,
                                     3.0]]  # these numbers are arbitrary.
                                   )
        # shape: (batch_size, trimmed_input_len)

        target_tokens = torch.tensor([
            vocab.get_token_index("tokens", self.model._target_namespace),
            vocab.get_token_index("the", self.model._target_namespace),
            self.model._oov_index,
        ])
        # shape: (batch_size,)

        target_to_source = torch.tensor([[0, 1, 0], [0, 0, 0], [1, 0, 1]])
        # shape: (batch_size, trimmed_input_len)

        copy_mask = torch.tensor([[True, True, False], [True, False, False],
                                  [True, True, True]])
        # shape: (batch_size, trimmed_input_len)

        # This is what the log likelihood result should look like.
        ll_check = np.array([
            # First instance.
            logsumexp(
                np.array([
                    generation_scores[0, target_tokens[0].item()].item(), 2.0
                ])) -
            logsumexp(
                np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0, 2.0])),
            # Second instance.
            generation_scores[1, target_tokens[1].item()].item() -
            logsumexp(np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0])),
            # Third instance.
            logsumexp(np.array([2.0, 3.0])) - logsumexp(
                np.array(
                    [0.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 2.0, 2.0, 3.0])),
        ])

        # This is what the selective_weights result should look like.
        selective_weights_check = np.stack([
            np.array([0.0, 1.0, 0.0]),
            np.array([0.0, 0.0, 0.0]),
            np.exp([2.0, float("-inf"), 3.0]) / (np.exp(2.0) + np.exp(3.0)),
        ])

        generation_scores_mask = generation_scores.new_full(
            generation_scores.size(), True, dtype=torch.bool)
        ll_actual, selective_weights_actual = self.model._get_ll_contrib(
            generation_scores,
            generation_scores_mask,
            copy_scores,
            target_tokens,
            target_to_source,
            copy_mask,
        )

        np.testing.assert_almost_equal(ll_actual.data.numpy(),
                                       ll_check,
                                       decimal=6)

        np.testing.assert_almost_equal(selective_weights_actual.data.numpy(),
                                       selective_weights_check,
                                       decimal=6)