def dprop(X, Sigma, X_means):
    r"""Evaluate the PDF of a mixture proposal distribution

    Evaluate the PDF of a gaussian mixture distribution with a common covariance
    matrix and different means.

        X (2d numpy array): Observations for which to evaluate the density
        Sigma (2d numpy array): Common covariance matrix for mixture distribution
        X_means (2d numpy array): Means for mixture distribution

        X.shape[1] == X_means.shape[1]
        Sigma.shape[0] == X_means.shape[1]
        Sigma.shape[1] == X_means.shape[1]

        numpy array: Density values
    n, n_dim = X.shape
    n_comp = X_means.shape[0]
    w = 1 / n_comp  # Equal weighting of mixture components

    L = zeros((n_comp, n))
    dist = mvnorm(cov=Sigma)
    for i in range(n_comp):
        L[i, :] = dist.pdf(X - X_means[i])

    return npsum(L, axis=0) * w
 def rvs(self, nums):
     ans = np.empty((nums, self.X.shape[0]))
     for num in range(nums):
         temp_model = mvnorm(
             np.dot(self.X, self.Beta.T).T[0, :], self.sigma_2_I)
         ans[num, :] = (temp_model.rvs(1))
     return (ans.T)
def approx_pnd(X_pred, X_cov, X_train, signs, n=int(1e4), seed=None):
    r"""Approximate the PND via mixture importance sampling

    Approximate the probability non-dominated (PND) for a set of predictive
    points using a mixture importance sampling approach. Predictive points are
    assumed to have predictive gaussian distributions (with specified mean and
    covariance matrix).

        X_pred (2d numpy array): Predictive values
        X_cov (iterable of 2d numpy arrays): Predictive covariance matrices
        X_train (2d numpy array): Training values, used to determine existing Pareto frontier
        signs (numpy array of +/-1 values): Array of optimization signs: {-1: Minimize, +1 Maximize}

        n (int): Number of draws for importance sampler
        seed (int): Seed for random state

        pr_scores (array): Estimated PND values
        var_values (array): Estimated variance values

        Owen *Monte Carlo theory, methods and examples* (2013)

    ## Setup
    X_wk_train = -X_train * signs
    X_wk_pred = -X_pred * signs
    n_train, n_dim = X_train.shape
    n_pred = X_pred.shape[0]

    ## Find the training Pareto frontier
    idx_pareto = pareto_min_rel(X_wk_train)
    n_pareto = len(idx_pareto)
    ## Sample the mixture points
    Sig_mix = make_proposal_sigma(X_wk_train, idx_pareto, X_cov)
    X_mix = rprop(n, Sig_mix, X_wk_train[idx_pareto, :], seed=seed)
    ## Take non-dominated points only
    idx_ndom = pareto_min_rel(X_mix, X_base=X_wk_train[idx_pareto, :])
    X_mix = X_mix[idx_ndom, :]

    ## Evaluate the Pr[non-dominated]
    d_mix = dprop(X_mix, Sig_mix, X_wk_train[idx_pareto, :])
    pr_scores = zeros(n_pred)
    var_values = zeros(n_pred)
    for i in range(n_pred):
        dist_test = mvnorm(mean=X_wk_pred[i], cov=X_cov[i])
        w_test = dist_test.pdf(X_mix) / d_mix
        # Owen (2013), Equation (9.3)
        pr_scores[i] = npsum(w_test) / n
        # Owen (2013), Equation (9.5)
        var_values[i] = npsum((w_test - pr_scores[i])**2) / n

    return pr_scores, var_values
 def rvs(self, nums):
     ans = np.empty((nums, self.dim))
     cov = inv(np.dot(self.X.T, self.X) + self.T) * self.sigma_2
     min_eig = np.min(np.real(np.linalg.eigvals(cov)))
     if min_eig < 0:
         cov -= 10 * min_eig * np.eye(*cov.shape)
     temp_model = mvnorm(
         np.dot(inv(np.dot(self.X.T, self.X) + self.T),
                np.dot(self.X.T, self.Y)).T[0, :], cov)
     for num in range(nums):
         ans[num, :] = temp_model.rvs(1)
     return (ans)
    def block_sample_z(self, z, psi, pi, As, Sigmas):
        Samples the state sequence z,
        also updates and returns the transition counts n_jk (n: customer in rest. j chooses dish k)
        (See E. Fox thesis page 158 algorithm 14 step 1 b) for reference)

        z : ndarray
            1D array containing the mode assignments for the nodes
        psi : ndarray
            2D array containing the states/pseudo-observations for each time step
            The dimensionality is [state_dim, T].
        pi: ndarray
            2D array containing the probabilities of transitioning to mode j from current mode k.
            The columns represent the current mode, and the rows the trans. prob.
            The dimensionality is [L, L]
        As : dict
            Dictionary containing for each mode in use the sampled dynamical system matrix A
        Sigmas : dict
            Dictionary containing for each mode in use the sampled noise matrix Sigma

        z : ndarray
            1D array containing the newly sampled mode assignments for the nodes
        n : ndarrax
            2D array containing the transition counts from mode j to mode k for the entire time sequence
            The dimensionality is [L, L]
        T = self.T
        L = self.L
        dims = self.xdim
        n = np.zeros((L, L), dtype=np.int32)
        messages = self.backward_message(z, psi, pi, As, Sigmas)
        for t in range(1, T):
            f = np.zeros(L)
            probabilities = np.zeros(L)
            for k in range(0, L):
                # f[k] = np.random.multivariate_normal(A[k]*psi[t-1],Sigma[k])*calculate_messages(k,t)
                # check if A matrix is 0 (otherwise norm throws error)

                # calculate likelihood for generating observation in the respective mode k
                pd = 0
                    # if state dim is 1, mvnorm throws error (1D case "norm" has to be used as far as I know
                    if dims == 1:
                        pd = norm(As[k] * psi[:, t - 1], Sigmas[k]).pdf(psi[:,
                        pd = mvnorm(As[k].dot(psi[:, t - 1]),
                                    Sigmas[k]).pdf(psi[:, t])
                    # If mean and sigma are 0, also throws error - catch it and just set f = 0 for this k
                    f[k] = 0
                if messages[k, t] == 0 or pd == 0 or pi[z[t - 1], k] == 0:
                    # Kind of another exception catching to force f to be 0 if one of the factors is zero
                    f[k] = 0
                    probabilities[k] = 0
                    # calculate probability of transitioning into mode k given mode z[t-1]
                    # use log for numerical reasons
                    f[k] = np.log(pd) + np.log(messages[k, t])
                    probabilities[k] = np.log(pi[z[t - 1], k]) + (f[k])
                    probabilities[k] = np.exp(probabilities[k])
            probabilities = probabilities / np.sum(probabilities)
            values = np.arange(0, L)
            # sample new z[t]
            z[t] = np.random.choice(values, 1, p=list(probabilities))
            if t != 0:
                # update n_jk to reflect new transition
                # n_jk stands for all transition from j to k within the entire time series)
                n[z[t - 1], z[t]] = n[z[t - 1], z[t]] + 1
            # add y[t] to the cached statistics

            #self.Y_cached[z[t]]['t'] = y[:, t]

        return z, n
    def backward_message(self, z, psi, pi, As, Sigmas):
        Calculates the backward messages needed for the subsequent sampling of z
        (See E. Fox thesis page 158 algorithm 14 step 1 a) for reference)

        z : ndarray
            1D array containing the mode assignments for the nodes
        psi : ndarray
            2D array containing the states/pseudo-observations for each time step
            The dimensionality is [state_dim, T].
        pi: ndarray
            2D array containing the probabilities of transitioning to mode j from current mode k.
            The columns represent the current mode and the rows the trans. prob.
            The dimensionality is [L, L]
        As : dict
            Dictionary containing for each mode in use the sampled dynamical system matrix A
        Sigmas : dict
            Dictionary containing for each mode in use the sampled noise matrix Sigma

        message : ndarray
                2D array containing the backward messages m_{t+1,t}(k)
                The dimensionality is [L, T]

        L = self.L
        T = self.T
        dims = self.xdim
        message = np.zeros((L, T))
        acc = 0
        sub = 0
        message[:, T - 1] = 1
        unique_z = np.unique(z)
        for t in range(T - 2, -1, -1):
            for k in range(0, L):
                acc = 0
                for l in range(0, L):
                    if np.linalg.norm(As[l]) == 0:
                        acc = acc
                        # check if the mode is used, then use the corresponding matirx - if not, use the initial prior matrix for A
                        # -> As will be filled with matrices sampled with respect to the mode -
                        # we need some index that "stores" the generic prior to be accessed
                        # if l in unique_z:
                        #     pd = norm(A[str(l)] * psi[t], Sigma[str(l)]).pdf(psi[t + 1])
                        # else:
                        if dims == 1:
                            pd = norm(As[l] * psi[:, t],
                                      Sigmas[l]).pdf(psi[:, t + 1])
                            acc = acc + message[l, t + 1] * pi[l, k] * pd
                            pd = mvnorm(As[l].dot(psi[:, t]),
                                        Sigmas[l]).pdf(psi[:, t + 1])
                            acc = acc + message[l, t + 1] * pi[l, k] * pd

                message[k, t] = acc
            if np.sum(message[:, t] == 0):
                message[:, t] = 1
            message[:, t] = message[:, t] / np.sum(message[:, t])

        return message