def coerce(x, shape=None): from .deterministic_tensor import DeterministicTensor if isinstance(x, float) or isinstance(x, int): return DeterministicTensor(T.constant(x)) if isinstance(x, np.ndarray): return DeterministicTensor(T.constant(x)) if isinstance(x, T.core.Tensor): return DeterministicTensor(x)
def vmp(graph, data, max_iter=100, tol=1e-4): q, visible = {}, {} for node in top_sort(graph)[::-1]: if node in data: visible[node] = T.to_float(data[node]) else: q[node] = initialize_node(node, {}) ordering = list(q.keys()) params = [q[var].get_parameters('natural') for var in ordering] prev_elbo = T.constant(float('inf')) def cond(i, elbo, prev_elbo, q): return T.logical_and(i < max_iter, abs(elbo - prev_elbo) > tol) def step(i, elbo, prev_elbo, q): prev_elbo = elbo q_vars = { var: var.__class__(param, 'natural') for var, param in zip(ordering, q) } q, elbo = message_passing(q_vars, visible) return i + 1, elbo, prev_elbo, [ q[var].get_parameters('natural') for var in ordering ] i, elbo, prev_elbo, q = T.while_loop(cond, step, [0, float('inf'), 0.0, params]) return { var: var.__class__(param, 'natural') for var, param in zip(ordering, q) }, elbo
def generate_data(N, D, K, sigma0=10, sigma=10, seed=None): np.random.seed(seed) pi = np.random.dirichlet([100] * K) X = np.zeros((N, D)) mu, sigma = np.random.multivariate_normal(mean=np.zeros(D), cov=np.eye(D) * sigma0, size=[K]), np.tile( np.eye(D)[None] * sigma, [K, 1, 1]) Z = np.zeros(N) for i in range(N): z = Z[i] = np.random.choice(K, p=pi) X[i] = np.random.multivariate_normal(mean=mu[z], cov=sigma[z]) return X.astype(np.float32), (mu, sigma, Z) return sess.run(obs_net(T.constant(X.astype( np.float32))).sample()[:, 0]), (mu, sigma, Z)
def posterior_dynamics(self, q_X, q_A, data_strength=1.0, max_iter=200, tol=1e-3): if self.smooth: if self.time_varying: prior_dyn = stats.MNIW( self.A_variational.get_parameters('natural'), 'natural') else: natparam = self.A_variational.get_parameters('natural') prior_dyn = stats.MNIW([ T.tile(natparam[0][None], [self.horizon - 1, 1, 1]), T.tile(natparam[1][None], [self.horizon - 1, 1, 1]), T.tile(natparam[2][None], [self.horizon - 1, 1, 1]), T.tile(natparam[3][None], [self.horizon - 1]), ], 'natural') state_prior = stats.Gaussian([T.eye(self.ds), T.zeros(self.ds)]) aaT, a = stats.Gaussian.unpack( q_A.expected_sufficient_statistics()) aaT, a = aaT[:, :-1], a[:, :-1] ds, da = self.ds, self.da initial_dyn_natparam = prior_dyn.get_parameters('natural') initial_X_natparam = stats.LDS( (self.sufficient_statistics(), state_prior, q_X, q_A.expected_value(), self.horizon), 'internal').get_parameters('natural') def em(i, q_dyn_natparam, q_X_natparam, _, curr_elbo): q_X_ = stats.LDS(q_X_natparam, 'natural') ess = q_X_.expected_sufficient_statistics() batch_size = T.shape(ess)[0] yyT = ess[..., :-1, ds:2 * ds, ds:2 * ds] xxT = ess[..., :-1, :ds, :ds] yxT = ess[..., :-1, ds:2 * ds, :ds] x = ess[..., :-1, -1, :ds] y = ess[..., :-1, -1, ds:2 * ds] xaT = T.outer(x, a) yaT = T.outer(y, a) xaxaT = T.concatenate([ T.concatenate([xxT, xaT], -1), T.concatenate([T.matrix_transpose(xaT), aaT], -1), ], -2) ess = [ yyT, T.concatenate([yxT, yaT], -1), xaxaT, T.ones([batch_size, self.horizon - 1]) ] q_dyn_natparam = [ T.sum(a, [0]) * data_strength + b for a, b in zip(ess, initial_dyn_natparam) ] q_dyn_ = stats.MNIW(q_dyn_natparam, 'natural') q_stats = q_dyn_.expected_sufficient_statistics() p_X = stats.LDS((q_stats, state_prior, None, q_A.expected_value(), self.horizon)) q_X_ = stats.LDS((q_stats, state_prior, q_X, q_A.expected_value(), self.horizon)) elbo = (T.sum(stats.kl_divergence(q_X_, p_X)) + T.sum(stats.kl_divergence(q_dyn_, prior_dyn))) return i + 1, q_dyn_.get_parameters( 'natural'), q_X_.get_parameters('natural'), curr_elbo, elbo def cond(i, _, __, prev_elbo, curr_elbo): with T.core.control_dependencies([T.core.print(curr_elbo)]): prev_elbo = T.core.identity(prev_elbo) return T.logical_and( T.abs(curr_elbo - prev_elbo) > tol, i < max_iter) result = T.while_loop( cond, em, [ 0, initial_dyn_natparam, initial_X_natparam, T.constant(-np.inf), T.constant(0.) ], back_prop=False) pd = stats.MNIW(result[1], 'natural') sigma, mu = pd.expected_value() q_X = stats.LDS(result[2], 'natural') return ((mu, sigma), pd.expected_sufficient_statistics()), (q_X, q_A) else: q_Xt = q_X.__class__([ q_X.get_parameters('regular')[0][:, :-1], q_X.get_parameters('regular')[1][:, :-1], ]) q_At = q_A.__class__([ q_A.get_parameters('regular')[0][:, :-1], q_A.get_parameters('regular')[1][:, :-1], ]) q_Xt1 = q_X.__class__([ q_X.get_parameters('regular')[0][:, 1:], q_X.get_parameters('regular')[1][:, 1:], ]) (XtAt_XtAtT, XtAt), (Xt1_Xt1T, Xt1) = self.get_statistics(q_Xt, q_At, q_Xt1) batch_size = T.shape(XtAt)[0] ess = [ Xt1_Xt1T, T.einsum('nha,nhb->nhba', XtAt, Xt1), XtAt_XtAtT, T.ones([batch_size, self.horizon - 1]) ] if self.time_varying: posterior = stats.MNIW([ T.sum(a, [0]) * data_strength + b for a, b in zip( ess, self.A_variational.get_parameters('natural')) ], 'natural') else: posterior = stats.MNIW([ T.sum(a, [0]) * data_strength + b[None] for a, b in zip( ess, self.A_variational.get_parameters('natural')) ], 'natural') Q, A = posterior.expected_value() return (A, Q), q_X
ellipse = 2. * np.dot(np.linalg.cholesky(cov), circle) + mean[:, None] if line: line.set_data(ellipse) line.set_alpha(alpha) else: ax.plot(ellipse[0], ellipse[1], linestyle='-', linewidth=2) N = 1000 K = 5 D = 2 sigma = 0.5 sigma0 = 100 data = generate_data(N, D, K, sigma=sigma, sigma0=sigma0, seed=None) p_pi = Dirichlet(T.constant(10.0 * np.ones([K], dtype=T.floatx()))) p_theta = NIW( list( map(lambda x: T.constant(np.array(x).astype(T.floatx())), [np.eye(D) * sigma, np.zeros(D), 1, D + 1]))) prior = (p_pi, p_theta) np.random.seed(None) X = T.placeholder(T.floatx(), [None, D]) batch_size = T.shape(X)[0] q_pi = make_variable(Dirichlet(np.ones([K], dtype=T.floatx()))) q_theta = make_variable( NIW( map(lambda x: np.array(x).astype(T.floatx()), [
else: ax.plot(ellipse[0], ellipse[1], linestyle='-', linewidth=2, alpha=alpha) N = 1000 K = 1 D = 2 sigma = 0.5 sigma0 = 100 data = generate_data(N, D, K, sigma=sigma, sigma0=sigma0, seed=None) p_pi = Dirichlet(T.constant(10.0 * np.ones([K], dtype=T.floatx()))) p_theta = NIW( list( map(lambda x: T.constant(np.array(x).astype(T.floatx())), [np.eye(D) * sigma, np.zeros(D), 1, D + 1]))) prior = (p_pi, p_theta) np.random.seed(None) X = T.placeholder(T.floatx(), [None, D]) batch_size = T.shape(X)[0] with T.initialization('xavier'): net = Relu(5) >> Relu(5) >> GaussianStats(D) encoded_data = Gaussian.unpack(net(T.constant(data)))[1]
sns.set_style('white') import numpy as np from tqdm import trange from sklearn.linear_model import LogisticRegression from deepx import T from deepx.nn import * from deepx.stats import Gaussian, Dirichlet, NIW, Categorical, kl_divergence, Bernoulli from activations import Gaussian as GaussianLayer from activations import GaussianStats N = 1000 D = 10 p_w = Gaussian([ T.constant(np.eye(D).astype(T.floatx()))[None], T.constant(np.zeros(D).astype(T.floatx()))[None] ]) def logistic(x): return 1 / (1 + np.exp(-x)) # def generate_data(N, D): # with T.session() as s: # w = np.random.multivariate_normal(mean=np.zeros(D), cov=np.eye(D)) # X = np.random.normal(size=(N, D)) # p = logistic(np.einsum('ia,a->i', X, w))
from deepx import stats, T N, H, ds, da = 1, 2, 4, 2 # random rotation for state-state transition A = np.zeros([H - 1, ds, ds]) for t in range(H - 1): theta = 0.5 * np.pi * np.random.rand() rot = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) out = np.zeros((ds, ds)) out[:2, :2] = rot q = np.linalg.qr(np.random.randn(ds, ds))[0] A[t] = q.dot(out).dot(q.T) A = T.constant(A, dtype=T.floatx()) B = T.constant(0.1 * np.random.randn(H - 1, ds, da), dtype=T.floatx()) Q = T.matrix_diag( np.random.uniform(low=0.9, high=1.1, size=[H - 1, ds]).astype(np.float32)) prior = stats.Gaussian([T.eye(ds), T.zeros(ds)]) p_S = stats.Gaussian([ T.eye(ds, batch_shape=[N, H]), T.constant(np.random.randn(N, H, ds), dtype=T.floatx()) ]) potentials = stats.Gaussian.unpack( p_S.get_parameters('natural')) + [p_S.log_z()] actions = T.constant(np.random.randn(N, H, da), dtype=T.floatx()) lds = stats.LDS(((A, B, Q), prior, potentials, actions))