def initialize_from_gaussian_lds(self, N_samples=100): """ Initialize z, A, C, sigma_states using a Gaussian LDS :return: """ from pylds.models import DefaultLDS init_model = DefaultLDS(n=self.n, p=self.K) for data in self.data_list: init_model.add_data(data["x"]) print("Initializing with Gaussian LDS") for smpl in range(20): init_model.resample_model() # Use the init model's parameters self.A = init_model.A.copy() self.C = init_model.C[:self.K-1,:].copy() self.sigma_states = init_model.sigma_states.copy() self.mu_init = init_model.mu_init.copy() self.sigma_init = init_model.sigma_init.copy() # Use the init model's latent state sequences too for data, init_data in zip(self.data_list, init_model.states_list): data["z"] = init_data.stateseq.copy() # Now resample omega self.emission_distn.resample_omega(self.data_list)
def random_model(n, p, T): data = np.random.randn(T, p) model = DefaultLDS(n, p) model.A = 0.99 * random_rotation(n, 0.01) model.C = np.random.randn(p, n) J, h = lds_to_dense_infoparams(model, data) model.add_data(data) return model, (J, h)
def fit_lds_gibbs(seq, inputs, guessed_dim, num_update_samples): """Fits LDS model via Gibbs sampling and EM. Returns fitted eigenvalues.""" if inputs is None: model = DefaultLDS(D_obs=1, D_latent=guessed_dim, D_input=0) else: model = DefaultLDS(D_obs=1, D_latent=guessed_dim, D_input=1) model.add_data(seq, inputs=inputs) ll = np.zeros(num_update_samples) # Run the Gibbs sampler for i in xrange(num_update_samples): try: model.resample_model() except AssertionError as e: warnings.warn(str(e), sm_exceptions.ConvergenceWarning) eigs = np.linalg.eigvals(model.A) return eigs[np.argsort(np.abs(eigs))[::-1]] ll[i] = model.log_likelihood() # Rough estimate of convergence: judge converged if the change of maximum # log likelihood is less than tolerance. recent_steps = int(num_update_samples / 10) tol = 1.0 if np.max(ll[-recent_steps:]) - np.max(ll[:-recent_steps]) > tol: warnings.warn( 'Questionable convergence. Log likelihood values: ' + str(ll), sm_exceptions.ConvergenceWarning) eigs = np.linalg.eigvals(model.A) return eigs[np.argsort(eigs.real)[::-1]]
def fit_gaussian_lds_model(Xs, Xtest, D_gauss_lds, N_samples=100): print("Fitting Gaussian (Raw) LDS with %d states" % D_gauss_lds) model = DefaultLDS(n=D_gauss_lds, p=K) Xs_centered = [X - np.mean(X, axis=0)[None,:] + 1e-3*np.random.randn(*X.shape) for X in Xs] for X in Xs_centered: model.add_data(X) # TODO: Get initial pred ll init_results = (0, None, np.nan, np.nan, np.nan) def resample(): tic = time.time() model.resample_model() toc = time.time() - tic # Monte Carlo sample to get pi density implied by Gaussian LDS Tpred = Xtest.shape[0] Npred = 1000 preds = model.sample_predictions(Xs_centered[0], Tpred, Npred=Npred) # Convert predictions to a distribution by finding the # largest dimension for each predicted Gaussian. # Preds is T x K x Npred, inds is TxNpred inds = np.argmax(preds, axis=1) pi = np.array([np.bincount(inds[t], minlength=K) for t in xrange(Tpred)]) / float(Npred) assert np.allclose(pi.sum(axis=1), 1.0) pi = np.clip(pi, 1e-8, 1.0) pi /= pi.sum(axis=1)[:,None] # Compute the log likelihood under pi pred_ll = np.sum([Multinomial(weights=pi[t], K=K).log_likelihood(Xtest[t][None,:]) for t in xrange(Tpred)]) return toc, None, np.nan, \ np.nan, \ pred_ll n_retries = 0 max_attempts = 5 while n_retries < max_attempts: try: times, samples, lls, test_lls, pred_lls = \ map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)]))) timestamps = np.cumsum(times) return Results(lls, test_lls, pred_lls, samples, timestamps) except Exception as e: print("Caught exception: ", e.message) print("Retrying") n_retries += 1 raise Exception("Failed to fit the Raw Gaussian LDS model in %d attempts" % max_attempts)
def random_model(n, p, d, T): data = np.random.randn(T, p) inputs = np.random.randn(T, d) model = DefaultLDS(p, n, d) model.A = 0.99 * random_rotation(n, 0.01) model.B = 0.1 * np.random.randn(n, d) model.C = np.random.randn(p, n) model.D = 0.1 * np.random.randn(p, d) J, h = lds_to_dense_infoparams(model, data, inputs) model.add_data(data, inputs=inputs) return model, (J, h)
def initialize_from_gaussian_lds(self, N_samples=100): """ Initialize z, A, C, sigma_states using a Gaussian LDS :return: """ from pylds.models import DefaultLDS init_model = DefaultLDS(n=self.n, p=self.K) for data in self.data_list: init_model.add_data(data["x"]) print("Initializing with Gaussian LDS") for smpl in range(20): init_model.resample_model() # Use the init model's parameters self.A = init_model.A.copy() self.C = init_model.C[:self.K - 1, :].copy() self.sigma_states = init_model.sigma_states.copy() self.mu_init = init_model.mu_init.copy() self.sigma_init = init_model.sigma_init.copy() # Use the init model's latent state sequences too for data, init_data in zip(self.data_list, init_model.states_list): data["z"] = init_data.stateseq.copy() # Now resample omega self.emission_distn.resample_omega(self.data_list)
def fit_gaussian_lds_model(Xs, N_samples=100): testmodel = DefaultLDS(n=D, p=K) for X in Xs: testmodel.add_data(X) samples = [] lls = [] for smpl in progprint_xrange(N_samples): testmodel.resample_model() samples.append(testmodel.copy_sample()) lls.append(testmodel.log_likelihood()) lls = np.array(lls) return lls
def fit_gaussian_lds_model(Xs, N_samples=100): testmodel = DefaultLDS(n=D,p=K) for X in Xs: testmodel.add_data(X) samples = [] lls = [] for smpl in progprint_xrange(N_samples): testmodel.resample_model() samples.append(testmodel.copy_sample()) lls.append(testmodel.log_likelihood()) lls = np.array(lls) return lls
from pybasicbayes.distributions import Regression, DiagonalRegression from pybasicbayes.util.text import progprint_xrange from pylds.models import LDS, DefaultLDS npr.seed(0) # Parameters D_obs = 1 D_latent = 2 D_input = 0 T = 2000 # Simulate from an LDS with diagonal observation noise truemodel = DefaultLDS(D_obs, D_latent, D_input, sigma_obs=0.1 * np.eye(D_obs)) inputs = np.random.randn(T, D_input) data, stateseq = truemodel.generate(T, inputs=inputs) # Fit with an LDS with diagonal observation noise diag_model = LDS(dynamics_distn=Regression( nu_0=D_latent + 2, S_0=D_latent * np.eye(D_latent), M_0=np.zeros((D_latent, D_latent + D_input)), K_0=(D_latent + D_input) * np.eye(D_latent + D_input)), emission_distn=DiagonalRegression(D_obs, D_latent + D_input)) diag_model.add_data(data, inputs=inputs) # Also fit a model with a full covariance matrix full_model = DefaultLDS(D_obs, D_latent, D_input) full_model.add_data(data, inputs=inputs)
import matplotlib.pyplot as plt from pybasicbayes.distributions import Regression, DiagonalRegression from pybasicbayes.util.text import progprint_xrange from pylds.models import DefaultLDS, MissingDataLDS npr.seed(0) # Model parameters D_obs = 4 D_latent = 4 T = 1000 # Simulate from an LDS truemodel = DefaultLDS(D_obs, D_latent) data, stateseq = truemodel.generate(T) # Mask off a chunk of data mask = np.ones_like(data, dtype=bool) chunksz = 100 for i,offset in enumerate(range(0,T,chunksz)): j = i % (D_obs + 1) if j < D_obs: mask[offset:min(offset+chunksz, T), j] = False if j == D_obs: mask[offset:min(offset+chunksz, T), :] = False # Fit with another LDS model = MissingDataLDS( dynamics_distn=Regression(
def fit_gaussian_lds_model(Xs, Xtest, D_gauss_lds, N_samples=100): print("Fitting Gaussian (Raw) LDS with %d states" % D_gauss_lds) model = DefaultLDS(n=D_gauss_lds, p=K) Xs_centered = [ X - np.mean(X, axis=0)[None, :] + 1e-3 * np.random.randn(*X.shape) for X in Xs ] for X in Xs_centered: model.add_data(X) # TODO: Get initial pred ll init_results = (0, None, np.nan, np.nan, np.nan) def resample(): tic = time.time() model.resample_model() toc = time.time() - tic # Monte Carlo sample to get pi density implied by Gaussian LDS Tpred = Xtest.shape[0] Npred = 1000 preds = model.sample_predictions(Xs_centered[0], Tpred, Npred=Npred) # Convert predictions to a distribution by finding the # largest dimension for each predicted Gaussian. # Preds is T x K x Npred, inds is TxNpred inds = np.argmax(preds, axis=1) pi = np.array( [np.bincount(inds[t], minlength=K) for t in xrange(Tpred)]) / float(Npred) assert np.allclose(pi.sum(axis=1), 1.0) pi = np.clip(pi, 1e-8, 1.0) pi /= pi.sum(axis=1)[:, None] # Compute the log likelihood under pi pred_ll = np.sum([ Multinomial(weights=pi[t], K=K).log_likelihood(Xtest[t][None, :]) for t in xrange(Tpred) ]) return toc, None, np.nan, \ np.nan, \ pred_ll n_retries = 0 max_attempts = 5 while n_retries < max_attempts: try: times, samples, lls, test_lls, pred_lls = \ map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)]))) timestamps = np.cumsum(times) return Results(lls, test_lls, pred_lls, samples, timestamps) except Exception as e: print("Caught exception: ", e.message) print("Retrying") n_retries += 1 raise Exception("Failed to fit the Raw Gaussian LDS model in %d attempts" % max_attempts)
def createExactPongData(T, D_obs, Start, Per, Amp): exactData = np.empty((T, D_obs)) exactData[0] = [Start] * D_obs for t in range(1, T): exactData[t] = calcDotPosition(t, Start, Per, Amp) return exactData data = createExactPongData(T, D_obs, Start, Per, Amp) #pp.pprint(data) # Fit with another LDS model = DefaultLDS(D_obs, D_latent) model.add_data(data) # Initialize with a few iterations of Gibbs for _ in progprint_xrange(10): model.resample_model() # Run EM def update(model): vlb = model.meanfield_coordinate_descent_step() return vlb vlbs = [update(model) for _ in progprint_xrange(50)]
num_mc_samples = 10 """Model selection / Hypothesis tesing""" # Now calculate # log posterior_odds = log prior_odds + log likelihood_ratio # - log posterior_odds = log p(H1|DI)/P(H2|DI) # - log prior_odds = log P(H1|I)/P(H2I) # - log likelihood_ratio = log P(D|H1I)/P(D|H2I) # The variable I represents all our background information # Let's assume our prior belief in both hypotheses is equal: P(H1|I) = P(H2I) # Now log_prior_odds is then log(1) = 0 log_prior_odds = 0 # Calculate log P(D|HI) by integrating out theta in p(Dtheta|HI)=p(D|thetaHI)p(theta|HI) print('Hypothesis 1') model = DefaultLDS(D_obs=1, D_latent=2) log_p_D_given_H1I = [] for _ in range(num_mc_samples): model.resample_parameters() log_p_D_given_H1I.append( np.sum([ model.log_likelihood(np.expand_dims(data[n], 1)) for n in range(num_samples) ])) # In the next line, we do a log-sum-exp over our list. # - The outer log puts the evidence on log scale # - The sum is over the MC samples # - The exp cancels the log in the distribution.logpdf() log_p_D_given_H1I = logsumexp(log_p_D_given_H1I) - np.log(num_mc_samples)
truemodel = LDS(dynamics_distn=AutoRegression(A=A, sigma=sigma_states), emission_distn=Regression(A=C, sigma=sigma_obs)) data, stateseq = truemodel.generate(2000) ############### # fit model # ############### def update(model): model.resample_model() return model.log_likelihood() model = DefaultLDS(n=2, p=data.shape[1]).add_data(data) vlbs = [update(model) for _ in progprint_xrange(100)] plt.figure(figsize=(3, 4)) plt.plot(vlbs) plt.xlabel('iteration') plt.ylabel('variational lower bound') ################ # predicting # ################ Npredict = 100 prediction_seed = data[:1700] predictions = model.sample_predictions(prediction_seed,
truemodel = LDS( dynamics_distn=AutoRegression(A=A,sigma=sigma_states), emission_distn=Regression(A=C,sigma=sigma_obs)) data, stateseq = truemodel.generate(2000) ############### # fit model # ############### def update(model): return model.meanfield_coordinate_descent_step() model = DefaultLDS(n=2,p=data.shape[1]).add_data(data) for _ in progprint_xrange(100): model.resample_model() vlbs = [update(model) for _ in progprint_xrange(50)] plt.figure(figsize=(3,4)) plt.plot(vlbs) plt.xlabel('iteration') plt.ylabel('variational lower bound') ################ # predicting # ################
C = np.array([[10.,0.]]) sigma_obs = 0.01*np.eye(1) # C = np.eye(2) # sigma_obs = 0.01*np.eye(2) ################### # generate data # ################### truemodel = LDS( dynamics_distn=AutoRegression(A=A,sigma=sigma_states), emission_distn=Regression(A=C,sigma=sigma_obs)) data, stateseq = truemodel.generate(2000) ############### # fit model # ############### model = DefaultLDS(n=2,p=data.shape[1]).add_data(data) likes = [] for _ in progprint_xrange(50): model.EM_step() likes.append(model.log_likelihood()) plt.plot(likes) plt.show()
dynamics_distn=AutoRegression(A=A,sigma=sigma_states), emission_distn=Regression(A=C,sigma=sigma_obs)) data, stateseq = truemodel.generate(2000) ############### # fit model # ############### def update(model): model.EM_step() return model.log_likelihood() model = DefaultLDS(n=2,p=data.shape[1]).add_data(data) likes = [update(model) for _ in progprint_xrange(50)] plt.figure(figsize=(3,4)) plt.plot(likes) plt.xlabel('iteration') plt.ylabel('training likelihood') ################ # predicting # ################ Npredict = 100 prediction_seed = data[:1700]
from pybasicbayes.distributions import Regression, DiagonalRegression from pybasicbayes.util.text import progprint_xrange from pylds.models import LDS, DefaultLDS npr.seed(0) # Parameters D_obs = 1 D_latent = 2 D_input = 0 T = 2000 # Simulate from an LDS truemodel = DefaultLDS(D_obs, D_latent, D_input) inputs = np.random.randn(T, D_input) data, stateseq = truemodel.generate(T, inputs=inputs) # Fit with an LDS with diagonal observation noise model = LDS(dynamics_distn=Regression( nu_0=D_latent + 2, S_0=D_latent * np.eye(D_latent), M_0=np.zeros((D_latent, D_latent + D_input)), K_0=(D_latent + D_input) * np.eye(D_latent + D_input)), emission_distn=DiagonalRegression(D_obs, D_latent + D_input)) model.add_data(data, inputs=inputs) # Fit with mean field def update(model):
import matplotlib.pyplot as plt from pybasicbayes.distributions import Regression, DiagonalRegression from pybasicbayes.util.text import progprint_xrange from pylds.models import DefaultLDS, LDS npr.seed(0) # Model parameters D_obs = 4 D_latent = 4 T = 1000 # Simulate from an LDS truemodel = DefaultLDS(D_obs, D_latent) data, stateseq = truemodel.generate(T) # Mask off a chunk of data mask = np.ones_like(data, dtype=bool) chunksz = 100 for i, offset in enumerate(range(0, T, chunksz)): j = i % (D_obs + 1) if j < D_obs: mask[offset:min(offset + chunksz, T), j] = False if j == D_obs: mask[offset:min(offset + chunksz, T), :] = False # Fit with another LDS model = LDS(dynamics_distn=Regression(nu_0=D_latent + 3, S_0=D_latent * np.eye(D_latent),
import matplotlib.pyplot as plt from pybasicbayes.util.text import progprint_xrange from pylds.models import DefaultLDS npr.seed(0) # Set parameters D_obs = 1 D_latent = 2 D_input = 0 T = 2000 # Simulate from one LDS truemodel = DefaultLDS(D_obs, D_latent, D_input) inputs = np.random.randn(T, D_input) data, stateseq = truemodel.generate(T, inputs=inputs) # Fit with another LDS model = DefaultLDS(D_obs, D_latent, D_input) model.add_data(data, inputs=inputs) # Initialize with a few iterations of Gibbs for _ in progprint_xrange(10): model.resample_model() # Run EM def update(model): model.EM_step()
except: colors = ['b', 'r', 'y', 'g'] from pybasicbayes.util.text import progprint_xrange from pylds.models import DefaultLDS npr.seed(3) # Set parameters D_obs = 1 D_latent = 2 D_input = 0 T = 2000 # Simulate from one LDS true_model = DefaultLDS(D_obs, D_latent, D_input, sigma_obs=np.eye(D_obs)) inputs = npr.randn(T, D_input) data, stateseq = true_model.generate(T, inputs=inputs) # Fit with another LDS test_model = DefaultLDS(D_obs, D_latent, D_input) test_model.add_data(data, inputs=inputs) # Run the Gibbs sampler N_samples = 100 def update(model): model.resample_model() return model.log_likelihood()