def dprop(X, Sigma, X_means): r"""Evaluate the PDF of a mixture proposal distribution Evaluate the PDF of a gaussian mixture distribution with a common covariance matrix and different means. Args: X (2d numpy array): Observations for which to evaluate the density Sigma (2d numpy array): Common covariance matrix for mixture distribution X_means (2d numpy array): Means for mixture distribution Preconditions: X.shape[1] == X_means.shape[1] Sigma.shape[0] == X_means.shape[1] Sigma.shape[1] == X_means.shape[1] Returns: numpy array: Density values """ n, n_dim = X.shape n_comp = X_means.shape[0] w = 1 / n_comp # Equal weighting of mixture components L = zeros((n_comp, n)) dist = mvnorm(cov=Sigma) for i in range(n_comp): L[i, :] = dist.pdf(X - X_means[i]) return npsum(L, axis=0) * w
def rvs(self, nums): ans = np.empty((nums, self.X.shape[0])) for num in range(nums): temp_model = mvnorm( np.dot(self.X, self.Beta.T).T[0, :], self.sigma_2_I) ans[num, :] = (temp_model.rvs(1)) return (ans.T)
def approx_pnd(X_pred, X_cov, X_train, signs, n=int(1e4), seed=None): r"""Approximate the PND via mixture importance sampling Approximate the probability non-dominated (PND) for a set of predictive points using a mixture importance sampling approach. Predictive points are assumed to have predictive gaussian distributions (with specified mean and covariance matrix). Args: X_pred (2d numpy array): Predictive values X_cov (iterable of 2d numpy arrays): Predictive covariance matrices X_train (2d numpy array): Training values, used to determine existing Pareto frontier signs (numpy array of +/-1 values): Array of optimization signs: {-1: Minimize, +1 Maximize} Kwargs: n (int): Number of draws for importance sampler seed (int): Seed for random state Returns: pr_scores (array): Estimated PND values var_values (array): Estimated variance values References: Owen *Monte Carlo theory, methods and examples* (2013) """ ## Setup X_wk_train = -X_train * signs X_wk_pred = -X_pred * signs n_train, n_dim = X_train.shape n_pred = X_pred.shape[0] ## Find the training Pareto frontier idx_pareto = pareto_min_rel(X_wk_train) n_pareto = len(idx_pareto) ## Sample the mixture points Sig_mix = make_proposal_sigma(X_wk_train, idx_pareto, X_cov) X_mix = rprop(n, Sig_mix, X_wk_train[idx_pareto, :], seed=seed) ## Take non-dominated points only idx_ndom = pareto_min_rel(X_mix, X_base=X_wk_train[idx_pareto, :]) X_mix = X_mix[idx_ndom, :] ## Evaluate the Pr[non-dominated] d_mix = dprop(X_mix, Sig_mix, X_wk_train[idx_pareto, :]) pr_scores = zeros(n_pred) var_values = zeros(n_pred) for i in range(n_pred): dist_test = mvnorm(mean=X_wk_pred[i], cov=X_cov[i]) w_test = dist_test.pdf(X_mix) / d_mix # Owen (2013), Equation (9.3) pr_scores[i] = npsum(w_test) / n # Owen (2013), Equation (9.5) var_values[i] = npsum((w_test - pr_scores[i])**2) / n return pr_scores, var_values
def rvs(self, nums): ans = np.empty((nums, self.dim)) cov = inv(np.dot(self.X.T, self.X) + self.T) * self.sigma_2 min_eig = np.min(np.real(np.linalg.eigvals(cov))) if min_eig < 0: cov -= 10 * min_eig * np.eye(*cov.shape) temp_model = mvnorm( np.dot(inv(np.dot(self.X.T, self.X) + self.T), np.dot(self.X.T, self.Y)).T[0, :], cov) for num in range(nums): ans[num, :] = temp_model.rvs(1) return (ans)
def block_sample_z(self, z, psi, pi, As, Sigmas): """ Samples the state sequence z, also updates and returns the transition counts n_jk (n: customer in rest. j chooses dish k) (See E. Fox thesis page 158 algorithm 14 step 1 b) for reference) Parameters ---------- z : ndarray 1D array containing the mode assignments for the nodes psi : ndarray 2D array containing the states/pseudo-observations for each time step The dimensionality is [state_dim, T]. pi: ndarray 2D array containing the probabilities of transitioning to mode j from current mode k. The columns represent the current mode, and the rows the trans. prob. The dimensionality is [L, L] As : dict Dictionary containing for each mode in use the sampled dynamical system matrix A Sigmas : dict Dictionary containing for each mode in use the sampled noise matrix Sigma Returns ------- z : ndarray 1D array containing the newly sampled mode assignments for the nodes n : ndarrax 2D array containing the transition counts from mode j to mode k for the entire time sequence The dimensionality is [L, L] """ T = self.T L = self.L dims = self.xdim n = np.zeros((L, L), dtype=np.int32) messages = self.backward_message(z, psi, pi, As, Sigmas) for t in range(1, T): f = np.zeros(L) probabilities = np.zeros(L) for k in range(0, L): # f[k] = np.random.multivariate_normal(A[k]*psi[t-1],Sigma[k])*calculate_messages(k,t) # check if A matrix is 0 (otherwise norm throws error) # calculate likelihood for generating observation in the respective mode k pd = 0 try: # if state dim is 1, mvnorm throws error (1D case "norm" has to be used as far as I know if dims == 1: pd = norm(As[k] * psi[:, t - 1], Sigmas[k]).pdf(psi[:, t]) else: pd = mvnorm(As[k].dot(psi[:, t - 1]), Sigmas[k]).pdf(psi[:, t]) except: # If mean and sigma are 0, also throws error - catch it and just set f = 0 for this k f[k] = 0 if messages[k, t] == 0 or pd == 0 or pi[z[t - 1], k] == 0: # Kind of another exception catching to force f to be 0 if one of the factors is zero f[k] = 0 probabilities[k] = 0 else: # calculate probability of transitioning into mode k given mode z[t-1] # use log for numerical reasons f[k] = np.log(pd) + np.log(messages[k, t]) probabilities[k] = np.log(pi[z[t - 1], k]) + (f[k]) probabilities[k] = np.exp(probabilities[k]) probabilities = probabilities / np.sum(probabilities) values = np.arange(0, L) # sample new z[t] z[t] = np.random.choice(values, 1, p=list(probabilities)) if t != 0: # update n_jk to reflect new transition # n_jk stands for all transition from j to k within the entire time series) n[z[t - 1], z[t]] = n[z[t - 1], z[t]] + 1 # add y[t] to the cached statistics ### INVESTIGATE USE OF CACHED STATISTICS AND IF NECESSARY #self.Y_cached[z[t]]['t'] = y[:, t] return z, n
def backward_message(self, z, psi, pi, As, Sigmas): """ Calculates the backward messages needed for the subsequent sampling of z (See E. Fox thesis page 158 algorithm 14 step 1 a) for reference) Parameters ---------- z : ndarray 1D array containing the mode assignments for the nodes psi : ndarray 2D array containing the states/pseudo-observations for each time step The dimensionality is [state_dim, T]. pi: ndarray 2D array containing the probabilities of transitioning to mode j from current mode k. The columns represent the current mode and the rows the trans. prob. The dimensionality is [L, L] As : dict Dictionary containing for each mode in use the sampled dynamical system matrix A Sigmas : dict Dictionary containing for each mode in use the sampled noise matrix Sigma Returns ------- message : ndarray 2D array containing the backward messages m_{t+1,t}(k) The dimensionality is [L, T] """ L = self.L T = self.T dims = self.xdim message = np.zeros((L, T)) acc = 0 sub = 0 message[:, T - 1] = 1 unique_z = np.unique(z) for t in range(T - 2, -1, -1): for k in range(0, L): acc = 0 for l in range(0, L): if np.linalg.norm(As[l]) == 0: acc = acc else: # check if the mode is used, then use the corresponding matirx - if not, use the initial prior matrix for A # -> As will be filled with matrices sampled with respect to the mode - # we need some index that "stores" the generic prior to be accessed # # if l in unique_z: # pd = norm(A[str(l)] * psi[t], Sigma[str(l)]).pdf(psi[t + 1]) # else: if dims == 1: pd = norm(As[l] * psi[:, t], Sigmas[l]).pdf(psi[:, t + 1]) acc = acc + message[l, t + 1] * pi[l, k] * pd else: pd = mvnorm(As[l].dot(psi[:, t]), Sigmas[l]).pdf(psi[:, t + 1]) acc = acc + message[l, t + 1] * pi[l, k] * pd message[k, t] = acc if np.sum(message[:, t] == 0): message[:, t] = 1 message[:, t] = message[:, t] / np.sum(message[:, t]) return message