def _make_model(self): data = self.model_data # The parameters are: # Gaussian observation distributions (ellipses in red-green intensity space) # mu_0 and sigma parameterize our prior belief about the means and sigma of each state # nu_0 expresses your confidence in the prior--it's the number of data points that # you claim got you these prior parameters. Nu_0 has to be strictly bigger than the # number of dimensions (2, in our case). You could do 2.01. # The nominal covariance is sigma_0/nu_0, so hence the 3 in sigma_0. # kappa_0: Uncertainty in the mean should be related to uncertainty in the covariance. # kappa_0 is an amplitude for that. Smaller number means other states' means will be # further away. obs_hypparams = dict( mu_0=data.mean(0), sigma_0=3. * cov(data), nu_0=3., kappa_0=0.5, ) # In the function call below: # (1) alpha and gamma bias how many states there are. We're telling it to expect # one state (conservative) # (2) kappa controls the self-transition bias. Bigger number means becomes more expensive # for states to non-self-transition (that is, change to a different state). model = WeakLimitStickyHDPHMM( alpha=1., gamma=1., init_state_distn='uniform', kappa=500., obs_distns=[Gaussian(**obs_hypparams) for _ in range(10)], ) return model
data, labels = truemodel.generate(T) plt.figure() truemodel.plot() plt.gcf().suptitle('True model') ################## # set up model # ################## Nmax = 20 obs_distns = \ [pyhsmm.distributions.Gaussian( mu_0=data.mean(0), sigma_0=0.5*cov(data), kappa_0=0.5, nu_0=obs_dim+3) for state in range(Nmax)] dur_distns = \ [pyhsmm.distributions.NegativeBinomialIntegerRVariantDuration( np.r_[0,0,0,0,0,1.,1.,1.], # discrete distribution uniform over {6,7,8} alpha_0=9,beta_0=1, # average geometric success probability 1/(9+1) ) for state in range(Nmax)] model = pyhsmm.models.HSMMIntNegBinVariant( init_state_concentration=10., alpha=6.,gamma=6., obs_distns=obs_distns, dur_distns=dur_distns) model.add_data(data,left_censoring=True)
def pca(data, num_components=2): U, s, Vh = np.linalg.svd(cov(data)) return Vh.T[:, :num_components]
data, labels = truemodel.generate(T) plt.figure() truemodel.plot() plt.gcf().suptitle('True model') ################## # set up model # ################## Nmax = 20 obs_distns = \ [pyhsmm.distributions.Gaussian( mu_0=data.mean(0), sigma_0=0.5*cov(data), kappa_0=0.5, nu_0=obs_dim+3) for state in range(Nmax)] dur_distns = \ [pyhsmm.distributions.NegativeBinomialIntegerRVariantDuration( np.r_[0,0,0,0,0,1.,1.,1.], # discrete distribution uniform over {6,7,8} alpha_0=9,beta_0=1, # average geometric success probability 1/(9+1) ) for state in range(Nmax)] model = pyhsmm.models.HSMMIntNegBinVariant(init_state_concentration=10., alpha=6., gamma=6., obs_distns=obs_distns, dur_distns=dur_distns) model.add_data(data, left_censoring=True)
############### # load data # ############### data = np.loadtxt("example-data.txt") T, obs_dim = data.shape ################## # set up model # ################## Nmax = 20 obs_distns = [ pyhsmm.distributions.Gaussian(mu_0=data.mean(0), sigma_0=0.5 * cov(data), kappa_0=0.5, nu_0=obs_dim + 3) for state in range(Nmax) ] dur_distns = [ pyhsmm.distributions.NegativeBinomialDuration(7 * 100, 1.0 / 100, 50 * 10, 50 * 1) for state in range(Nmax) ] model = pyhsmm.models.HSMMGeoApproximation( init_state_concentration=Nmax, # doesn't matter for one chain alpha=6.0, gamma=6.0, obs_distns=obs_distns, dur_distns=dur_distns, trunc=150, ) # NOTE: blows up this isn't long enough wrt the NegativeBinomial parameters