def fit(self): data = np.squeeze(np.array(self._demonstrations[0])) #np.loadtxt(os.path.join(os.path.dirname(__file__),'example-data.txt'))[:T] Nmax = 25 # and some hyperparameters obs_dim = data.shape[1] print data.shape obs_hypparams = {'mu_0':np.zeros(obs_dim), 'sigma_0':np.eye(obs_dim), 'kappa_0':0.25, 'nu_0':obs_dim+2} dur_hypparams = {'alpha_0':2*30, 'beta_0':2} obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)] dur_distns = [pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)] posteriormodel = pyhsmm.models.WeakLimitHDPHSMM( alpha=6.,gamma=6., # these can matter; see concentration-resampling.py init_state_concentration=6., # pretty inconsequential obs_distns=obs_distns, dur_distns=dur_distns) for d in self._demonstrations: posteriormodel.add_data(np.squeeze(np.array(d)),trunc=60) # duration truncation speeds things up when it's possible for idx in progprint_xrange(50): posteriormodel.resample_model() new_segments = [] for i in range(0, len(self._demonstrations)): new_segments.append(self.findTransitions(posteriormodel.states_list[i].stateseq)) self.segmentation = new_segments self.model = posteriormodel
def test_examples(self): np.seterr(divide='ignore') # these warnings are usually harmless for this code # load_basic data T = 1000 data = np.loadtxt(os.path.join(os.path.dirname(__file__),'example-data.txt'))[:T] # posterior inference Nmax = 25 obs_dim = data.shape[1] obs_hypparams = {'mu_0':np.zeros(obs_dim), 'sigma_0':np.eye(obs_dim), 'kappa_0':0.25, 'nu_0':obs_dim+2} dur_hypparams = {'alpha_0':2*30, 'beta_0':2} obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)] dur_distns = [pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)] hsmm = pyhsmm.models.WeakLimitHDPHSMM( alpha=6.,gamma=6., # these can matter; see concentration-resampling.py init_state_concentration=6., # pretty inconsequential obs_distns=obs_distns, dur_distns=dur_distns) hsmm.add_data(data,trunc=60) # duration truncation speeds things up when it's possible for idx in progprint_xrange(150): hsmm.resample_model() hsmm.plot() plt.show()
def train_sticky_hdp_hmm(data_sets, nmax, niter, nsamples): """ :param data_sets: a list of ndarrays. :param nmax: :param niter: :param nsamples: :return: """ interval = niter / nsamples obs_dim = data_sets[0].shape[-1] obs_hypparams = {'mu_0': np.zeros(obs_dim), 'sigma_0': np.eye(obs_dim), 'kappa_0': 0.25, 'nu_0': obs_dim + 2} ### HDP-HMM without the sticky bias obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(nmax)] posteriormodel = pyhsmm.models.WeakLimitStickyHDPHMM(kappa=50., alpha=6., gamma=6., init_state_concentration=1., obs_distns=obs_distns) for d in data_sets: posteriormodel.add_data(d) models = [] last = None for idx in progprint_xrange(niter): posteriormodel.resample_model() if idx % interval == 0: models.append(copy.deepcopy(posteriormodel)) last = posteriormodel models[nsamples - 1] = posteriormodel return models
def hmm_run(data, Nmax=6, step=100, kappa=12.5, alpha=3., gamma=3.): # Set the weak limit truncation level # and some hyperparameters obs_dim = 1 try: obs_dim = data.shape[1] except: pass obs_hypparams = { 'mu_0': np.zeros(obs_dim), 'sigma_0': np.eye(obs_dim), 'kappa_0': 10, 'nu_0': obs_dim + 2 } obs_distns = [ pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax) ] model = pyhsmm.models.WeakLimitStickyHDPHMM(kappa=kappa, alpha=alpha, gamma=gamma, init_state_concentration=1., obs_distns=obs_distns) model.add_data(data) for idx in progprint_xrange(step): model.resample_model() return model
def fit_model(data, N_iters, args={}): # Now fit the model with a model using all the data default_args = dict(N=N, K_max=K, alpha=alpha, gamma=gamma, alpha_obs=alpha_obs, beta_obs=beta_obs, init_state_concentration=1.0) default_args.update(args) model = PoissonHDPHMM(**default_args) model.add_data(data) def _evaluate(model): ll = model.log_likelihood() return ll, K_used(model) def _step(model): model.resample_model() return _evaluate(model) results = [_step(model) for _ in progprint_xrange(N_iters)] lls = np.array([r[0] for r in results]) Ks = np.array([r[1] for r in results]) return lls, Ks
def estimate_hidden_states(data,Nb_states,Nb_samples,verbose=0,pp=None,title_text=None): ### define uninformed priors prior_means = np.linspace(data.min(),data.max(),Nb_states) gauss = [{'mu_0': mean, 'sigmasq': 1.,'tausq_0': 1.} for mean in prior_means] #NegBin = {'alpha_0': 1.,'beta_0': 1.,'r': 1.} #normal = distributions.ScalarGaussianFixedvar(mu_0=np.mean(data),tausq_0=100.,sigmasq=100.) gaussMixture = [distributions.ScalarGaussianFixedvarMixture(mu_0_list=prior_means,tausq_0_list=[1.]*Nb_states,mixture_weights_0=[1./Nb_states]*Nb_states,sigmasq=1.) for _ in range(Nb_states)] #gaussMixture = [distributions.MixtureDistribution(components=[distributions.ScalarGaussianFixedvar(**gauss_params) for gauss_params in gauss],alpha_0=Nb_states) for _ in range(Nb_states)] #poi_dist = distributions.PoissonDuration(alpha_0=10.,beta_0=2.) #ng_dist = distributions.NegativeBinomialFixedRDuration(r=1.,alpha_0=1.,beta_0=1.) pnbMixture = [distributions.MixtureDurationDistribution(components=[distributions.PoissonDuration(alpha_0=10.,beta_0=2.),distributions.NegativeBinomialFixedRDuration(r=1.,alpha_0=1.,beta_0=1.)],alpha_0=Nb_states) for _ in range(Nb_states)] ### construct model posterior_HDPHSMM = pyhsmm.models.WeakLimitHDPHSMMPossibleChangepoints( init_state_distn='uniform', alpha=20.,gamma=20., obs_distns=gaussMixture, dur_distns=pnbMixture) # obs_distns=[distributions.ScalarGaussianFixedvar(**gauss_params) for gauss_params in gauss] ### train model if verbose == 0: iterator = range(Nb_samples) elif verbose == 1 or verbose == 2: iterator = progprint_xrange(Nb_samples) train_model(posterior_HDPHSMM,data,iterator) ### show result if verbose == 1: show_result(posterior_HDPHSMM,data,pp=pp,title_text=title_text) elif verbose == 2: show_result(posterior_HDPHSMM,data,draw=True,pp=pp,title_text=title_text) return posterior_HDPHSMM.stateseqs[0]
def fit(self): p = self._demonstration_sizes[0][1] Nmax = self._demonstration_sizes[0][0] affine = True nlags = self.lag obs_distns=[di.AutoRegression( nu_0=self.nu, S_0=np.eye(p), M_0=np.zeros((p,2*p+affine)), K_0=np.eye(2*p+affine), affine=affine) for state in range(Nmax)] dur_distns=[NegativeBinomialIntegerR2Duration( r_discrete_distn=np.ones(10.),alpha_0=1.,beta_0=1.) for state in range(Nmax)] model = m.ARWeakLimitHDPHSMMIntNegBin( alpha=self.alpha,gamma=self.gamma,init_state_concentration=self.init_state_concentration, obs_distns=obs_distns, dur_distns=dur_distns, ) for d in self._demonstrations: model.add_data(d,trunc=60) #model.resample_model() for itr in progprint_xrange(20): model.resample_model() new_segments = [] for i in range(0, len(self._demonstrations)): #print model.states_list[i].stateseq new_segments.append(self.findTransitions(model.states_list[i].stateseq)) self.segmentation = new_segments self.model = model
def _fit_hmm(hmm, algo_name): if algo_name=="Baum-Welch": print('Gibbs sampling for initialization') for idx in progprint_xrange(25): hmm.resample_model() ## in models line 440, we se that this is resampling the parameters and resampling the states print('fit EM model') likes = hmm.EM_fit() else: raise NotImplementedError return hmm
def fit(self, data, num_iterations=1000, verbose=True): self.data = data self.num_datapoints = self.data.shape[0] self.data_dimensions = self.data.shape[1] self.hmm_model.add_data(data) if verbose: index_range = progprint_xrange(num_iterations) else: index_range = np.arange(num_iterations) self.model_samples = [ self.hmm_model.resample_and_copy() for index in index_range ]
def fit(self): data = np.squeeze( np.array(self._demonstrations[0]) ) #np.loadtxt(os.path.join(os.path.dirname(__file__),'example-data.txt'))[:T] Nmax = 25 # and some hyperparameters obs_dim = data.shape[1] print data.shape obs_hypparams = { 'mu_0': np.zeros(obs_dim), 'sigma_0': np.eye(obs_dim), 'kappa_0': 0.25, 'nu_0': obs_dim + 2 } dur_hypparams = {'alpha_0': 2 * 30, 'beta_0': 2} obs_distns = [ pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax) ] dur_distns = [ pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax) ] posteriormodel = pyhsmm.models.WeakLimitHDPHSMM( alpha=6., gamma=6., # these can matter; see concentration-resampling.py init_state_concentration=6., # pretty inconsequential obs_distns=obs_distns, dur_distns=dur_distns) for d in self._demonstrations: posteriormodel.add_data( np.squeeze(np.array(d)), trunc=60 ) # duration truncation speeds things up when it's possible for idx in progprint_xrange(50): posteriormodel.resample_model() new_segments = [] for i in range(0, len(self._demonstrations)): new_segments.append( self.findTransitions(posteriormodel.states_list[i].stateseq)) self.segmentation = new_segments self.model = posteriormodel
def fit(self): p = self._demonstration_sizes[0][1] Nmax = self._demonstration_sizes[0][0] affine = True nlags = self.lag obs_distns = [ di.AutoRegression(nu_0=self.nu, S_0=np.eye(p), M_0=np.zeros((p, 2 * p + affine)), K_0=np.eye(2 * p + affine), affine=affine) for state in range(Nmax) ] dur_distns = [ NegativeBinomialIntegerR2Duration(r_discrete_distn=np.ones(10.), alpha_0=1., beta_0=1.) for state in range(Nmax) ] model = m.ARWeakLimitHDPHSMMIntNegBin( alpha=self.alpha, gamma=self.gamma, init_state_concentration=self.init_state_concentration, obs_distns=obs_distns, dur_distns=dur_distns, ) for d in self._demonstrations: model.add_data(d, trunc=60) #model.resample_model() for itr in progprint_xrange(20): model.resample_model() new_segments = [] for i in range(0, len(self._demonstrations)): #print model.states_list[i].stateseq new_segments.append( self.findTransitions(model.states_list[i].stateseq)) self.segmentation = new_segments self.model = model
def __init__(self, nparray): # nparray input self.nparray = nparray # get observation dimension self.obs_dim = nparray.shape[1] # define emission parameters self.obs_hypparams = {'mu_0': np.zeros(self.obs_dim), 'sigma_0': np.eye(self.obs_dim), 'kappa_0': 0.25, 'nu_0': self.obs_dim+2} # define duration parameters self.dur_hypparams = {'alpha_0':2*30, 'beta_0':2} # define max hidden states to uncover self.Nmax = 10 # init emission distributions self.obs_distns = [pyhsmm.distributions.Gaussian(**self.obs_hypparams) for state in range(self.Nmax)] # init duration distributions self.dur_distns = [pyhsmm.distributions.PoissonDuration( **self.dur_hypparams) for state in range(self.Nmax)] # init hdp-hsmm self.posteriormodel = pyhsmm.models.WeakLimitHDPHSMM(alpha=6.,gamma=6., init_state_concentration=6., obs_distns=self.obs_distns, dur_distns=self.dur_distns) # add data self.posteriormodel.add_data(self.nparray, trunc=60) # train the model for idx in progprint_xrange(150): self.posteriormodel.resample_model() # results self.states = self.posteriormodel.stateseqs
def fit(self, data, lengths): obs_dim = data.shape[1] Nmax = self.hmm_hidden_state_amount dur_length = self.max_duration_length maxIter = self.hmm_max_train_iteration data_length = lengths obs_hypparams = { 'mu_0': np.zeros(obs_dim), 'sigma_0': np.eye(obs_dim), 'kappa_0': 0.25, 'nu_0': obs_dim + 2 } dur_hypparams = {'alpha_0': 2 * 30, 'beta_0': 2} obs_distns = [ pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax) ] dur_distns = [ pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax) ] posteriormodel = pyhsmm.models.WeakLimitHDPHSMM( alpha=6., gamma=6., # thesecan matter; see concentration-resampling.py init_state_concentration=6., # pretty inconsequential obs_distns=obs_distns, dur_distns=dur_distns) posteriormodel.add_data( data, trunc=dur_length ) # duration truncation speeds things up when it's possible for idx in progprint_xrange(maxIter): posteriormodel.resample_model() print('-resampling-%d/%d' % (idx, maxIter)) self.model = posteriormodel return self
def hsmm_segmenter(factors, width=MEDIAN_WIDTH): obs = pre.normalize(factors, axis=1) obs_dim = obs.shape[1] obs_len = obs.shape[0] obs_hypparams = { 'mu_0': np.mean(obs, axis=0), 'sigma_0': np.cov(obs, rowvar=0), 'kappa_0': 0.25, 'nu_0': obs_dim + 2} dur_hypparams = {'alpha_0': 48, 'beta_0': 2} obs_distns = [distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)] dur_distns = [distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)] posteriormodel = pyhsmm.models.WeakLimitHDPHSMM( alpha_a_0=1., alpha_b_0=1./4, gamma_a_0=1., gamma_b_0=1./4, init_state_concentration=1., # pretty inconsequential obs_distns=obs_distns, dur_distns=dur_distns) posteriormodel.add_data(factors, trunc=int(obs_len / 3)) for idx in textutil.progprint_xrange(150): posteriormodel.resample_model() labels = posteriormodel.stateseqs boundaries, labels = find_boundaries(labels, width) best_n_types = len(np.unique(labels)) if len(boundaries) < best_n_types + 1: best_n_types = len(boundaries) - 1 best_labels = segment_labeling(factors, boundaries, c_method='kmeans', k=best_n_types) best_boundaries = np.array(boundaries) return best_boundaries, best_labels
for state in range(library_size) ] model = LibraryHSMMIntNegBinVariant(init_state_concentration=10., alpha=6., gamma=6., obs_distns=obs_distns, dur_distns=dur_distns) for data in training_datas: model.add_data(data, left_censoring=True) # model.add_data_parallel(data,left_censoring=True) ################## # infer things # ################## train_likes = [] test_likes = [] for i in progprint_xrange(5): model.resample_model() # model.resample_model_parallel() train_likes.append(model.log_likelihood()) # test_likes.append(model.log_likelihood(test_data,left_censoring=True)) newmodel = model.unfreeze() for i in progprint_xrange(5): newmodel.resample_model()
NegativeBinomialIntegerRVariantDuration(np.r_[0., 0, 0, 1, 1, 1, 1, 1], alpha_0=5., beta_0=5.) for state in range(Nmax) ] model = LibraryHSMMIntNegBinVariant(init_state_concentration=10., alpha=6., gamma=6., obs_distns=obs_distns, dur_distns=dur_distns) for data in datas: model.add_data_parallel(data, left_censoring=True) ################## # infer things # ################## for i in progprint_xrange(25): model.resample_model_parallel() # plt.figure() # truemodel.plot() # plt.gcf().suptitle('truth') # plt.figure() # model.plot() # plt.gcf().suptitle('inferred') # plt.show()
mu_0=np.zeros(obs_dim), sigma_0=np.eye(obs_dim), kappa_0=0.25, nu_0=obs_dim+2) obs_distns = \ [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)] model = pyhsmm.models.WeakLimitStickyHDPHMM( kappa=50.,alpha=6.,gamma=6.,init_state_concentration=1., obs_distns=obs_distns) model.add_data(data) # run inference! fig = model.make_figure() model.plot(fig=fig,draw=False) for _ in progprint_xrange(250): model.resample_model() model.plot(fig=fig,update=True) # from moviepy.video.io.bindings import mplfig_to_npimage # from moviepy.editor import VideoClip # fig = model.make_figure() # model.plot(fig=fig,draw=False) # def make_frame_mpl(t): # model.resample_model() # model.plot(fig=fig,update=True,draw=False) # return mplfig_to_npimage(fig)
plt.close() s = model.states_list[0] """ ### HDP-HMM without the sticky bias obs_distns = [ pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax) ] posteriormodel = pyhsmm.models.WeakLimitHDPHMM(alpha=6., gamma=6., init_state_concentration=1., obs_distns=obs_distns) posteriormodel.add_data(data) for idx in progprint_xrange(ITERATIONS): posteriormodel.resample_model() posteriormodel.plot() plt.gcf().suptitle( 'HDP-HMM sampled model after {} iterations'.format(ITERATIONS)) plt.savefig('plots/2d-data/hdp-hmm.png') plt.close() # Some more hypparams obs_hypparams = { 'mu_0': np.zeros(obs_dim), 'sigma_0': np.eye(obs_dim), 'kappa_0': 0.3, 'nu_0': obs_dim + 5
obs_distns = [ library_models.FrozenMixtureDistribution( components=component_library, a_0=1.0,b_0=1./5,weights=weights, # for initialization only ) for weights in init_weights] hmm = library_models.LibraryHMM( init_state_concentration=10., alpha_a_0=1.0,alpha_b_0=1./10, gamma_a_0=1,gamma_b_0=1, obs_distns=obs_distns) hmm.add_data(training_data) ########################## # Gather model samples # ########################## for itr in progprint_xrange(num_iter): hmm.resample_model() hmms = [hmm.resample_and_copy() for itr in progprint_xrange(1)] ########## # Save # ########## with open('/scratch/hmm_results.pickle','w') as outfile: cPickle.dump(hmms,outfile,protocol=-1)
elif distribution_name=='Dur nbinom': return pyhsmm.basic.distributions.NegativeBinomialFixedRDuration(**params) elif distribution_name=='Dur poisson': return pyhsmm.basic.distributions.PoissonDuration(**params) L = 2 # Number of times we repeat the distribution for each mode distns = {} for device in devices_list: distns[device] = {'Obs': [],'Dur': []} for mode, mode_value in hypparamss[device].items(): for dist_name in mode_value.keys(): for _ in range(L): distns[device][dist_name.split(' ')[0]].append(get_distribution(dist_name,hypparamss[device][mode][dist_name])) ### construct posterior model posteriormodel = models.Factorial([models.FactorialComponentHSMM( init_state_concentration=2., alpha=1.,gamma=4., obs_distns=distns[device]['Obs'], dur_distns=distns[device]['Dur'], trunc=200) for device in devices_list]) posteriormodel.add_data(data=powers['use'].values) nsubiter=25 for itr in progprint_xrange(20): posteriormodel.resample_model(min_extra_noise=0.1,max_extra_noise=100.**2,niter=nsubiter)
] model = LibraryHSMMIntNegBinVariant(init_state_concentration=10., alpha=6., gamma=6., obs_distns=obs_distns, dur_distns=dur_distns) for data in training_datas: model.add_data(data, left_censoring=True) ################## # infer things # ################## samples1 = [model.resample_and_copy() for i in progprint_xrange(1)] samples2 = [model.resample_and_copy() for i in progprint_xrange(10)] samples3 = [model.resample_and_copy() for i in progprint_xrange(100)] # samples4 = [model.resample_and_copy() for i in progprint_xrange(1000)] import cPickle with open('samples1', 'w') as outfile: cPickle.dump(samples1, outfile, protocol=-1) with open('samples2', 'w') as outfile: cPickle.dump(samples2, outfile, protocol=-1) with open('samples3', 'w') as outfile: cPickle.dump(samples3, outfile, protocol=-1) # with open('samples4','w') as outfile:
gamma_a_0=0.5, gamma_b_0=3.0, init_state_concentration=6.0, obs_distns=[ d.MNIW(len(dmu) + 2, 5 * np.diag(np.diag(dcov)), np.zeros((len(dmu), 2 * len(dmu))), 10 * np.eye(2 * len(dmu))) for state in range(Nmax) ], # dur_distns=[pyhsmm.basic.distributions.GeometricDuration(100,2000) for state in range(Nmax)], # dur_distns=[pyhsmm.basic.distributions.PoissonDuration(3*20,3) for state in range(Nmax)], dur_distns=[ pyhsmm.basic.distributions.NegativeBinomialDuration(10 * 10.0, 1.0 / 10.0, 3 * 10.0, 1 * 10.0) for state in range(Nmax) ], ) for i, t in enumerate(tracks): model.add_data_parallel(i) ######################## # try some inference # ######################## plt.figure() for itr in progprint_xrange(200): if itr % 5 == 0: plt.gcf().clf() model.plot() plt.ion() plt.draw() plt.ioff() model.resample_model_parallel()
def main(result_dir=None): if result_dir == None: result_dir = get_result_dir(__file__) os.mkdir(result_dir) param_path = os.path.join(result_dir, 'parameter.json') fig_title_path = os.path.join(result_dir, 'fig_title.json') #initialize model params# # O(T*L_max*W_max^2*d_max^3) ITER_N = 10 LETTER_N = 7 WORD_N = 7 DATA_N = 60 obs_dim = 3 model_hypparams = {'state_dim': WORD_N, 'alpha': 10.0, 'gamma': 10.0} word_model_params = {'letter_type': LETTER_N, 'rho': 10} obs_hypparams = { 'mu_0': np.zeros(obs_dim), 'sigma_0': np.eye(obs_dim), 'kappa_0': 0.01, 'nu_0': obs_dim + 5 } dur_hypparams = {'alpha_0': 50.0, 'beta_0': 10.0} length_dist = pyhsmm.distributions.PoissonDuration(alpha_0=30, beta_0=10, lmbda=3) #setting# obs_dists = [Gaussian(**obs_hypparams) for state in range(LETTER_N)] dur_dists = [ pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(LETTER_N) ] letter_hsmm = LLHSMM(init_state_concentration=10, alpha=10.0, gamma=10.0, dur_distns=dur_dists, obs_distns=obs_dists) model = DAHSMM(model_hypparams, letter_hsmm, length_dist, obs_dists, dur_dists) #dump_object in multi engines# #""" print "--------------------------------------dump process start--------------------------------------" count = multiprocessing.Value('i', 0) datatxt_names = glob.glob('./DATA/*.txt') datatxt_names.sort() for f in datatxt_names: input_mat = np.loadtxt(f) f = f.replace("./DATA/", "") f = f.replace(".txt", "") pr = multiprocessing.Process(target=multi_dump_object, args=(input_mat, model, f, count)) pr.start() time.sleep(0.1) while (1): if count.value > 55: time.sleep(1) print "--------------------------------------dump process completed!!--------------------------------------" break #""" #add_data in one engine# """ filename = [] for f in glob.glob('./DATA/*.txt'): model.add_data(np.loadtxt(f)) f = f.replace("./DATA/","") filename.append(f.replace(".txt","")) """ #add_data in multi engines# print "--------------------------------------add_data process start--------------------------------------" filename = [] datadmp_names = glob.glob('./TMP/*.dump') datadmp_names.sort() for f in datadmp_names: print f, " loading..." fp = open(f) f = f.replace("./TMP/", "") f = f.replace(".dump", "") filename.append(f) obj = pickle.load(fp) model.states_list.append(obj) if model.parallel: parallel.add_data(model.states_list[-1].data) fp.close() print "--------------------------------------add_data process completed!!--------------------------------------" #save params&charm# save_fig_title(fig_title_path, SAVE_PARAMS, locals()) save_parameters(param_path, SAVE_PARAMS, locals()) obs_hypparams['sigma_0'] = np.eye(obs_dim) obs_hypparams['mu_0'] = np.zeros(obs_dim) #estimation&result_write# print "--------------------------------------estimation process start--------------------------------------" result = Result(result_dir, DATA_N) loglikelihood = [] for idx in progprint_xrange(ITER_N, perline=10): model.resample_model() loglikelihood.append(result.save_loglikelihood(model)) result.save(model) result.write_loglikelihood(loglikelihood) print "--------------------------------------estimation process completed!!--------------------------------------"
[pyhsmm.distributions.NegativeBinomialIntegerR2Duration( **dur_hypparams) for superstate in range(Nmaxsuper)] model = models.WeakLimitHDPHSMMSubHMMs( init_state_concentration=6., sub_init_state_concentration=6., alpha=6.,gamma=6., sub_alpha=6.,sub_gamma=6., obs_distnss=obs_distnss, dur_distns=dur_distns) model.add_data(data,stateseq=_) model.resample_parameters() model.resample_parameters() model.resample_parameters() ############### # inference # ############### for itr in progprint_xrange(5): model.resample_model() plt.figure() model.plot() plt.gcf().suptitle('fit') s = model.states_list[0] plt.show()
# 'J_0':np.ones(obs_dim) * 0.001, #sq_0 #changes the hidden state detection (the lower the better) #0.001 # 'alpha_0':np.ones(obs_dim) * 0.1, #(make the number of hidden states worse higher the better) # 'beta_0':np.ones(obs_dim) * 1} dur_hypparams = {'alpha_0':2*30*500, 'beta_0':3} obs_distns = [distributions.Gaussian(**obs_hypparams) for state in range(Nmax)] dur_distns = [distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)] datas = online_df.loc[online_df['indicator']==3, ['p_del','p_dup']].values posteriormodel = pyhsmm.models.WeakLimitHDPHSMM( # alpha=6.,gamma=6., # better to sample over these; see concentration-resampling.py 10,1 alpha=1.,gamma=1./4, init_state_concentration=600., # pretty inconsequential obs_distns=obs_distns, dur_distns=dur_distns) posteriormodel.add_data(datas,trunc=80) for idx in progprint_xrange(40): # 100->50 posteriormodel.resample_model()#num_procs=1) exp_state = np.empty(len(online_df), dtype=int) indicators = online_df['indicator'].values exp_state[indicators==3] = posteriormodel.stateseqs[0] online_df['exp_state']=exp_state online_df.to_csv('/zfssz2/ST_MCHRI/BIGDATA/PROJECT/NIPT_CNV/f_cnv_out/online/NA12878/out1.csv', sep='\t')
np.r_[0,0,0,0,0,1.,1.,1.], # discrete distribution uniform over {6,7,8} 9,1, # average geometric success probability 1/(9+1) ) for state in range(Nmax)] model = pyhsmm.models.HSMMIntNegBin(init_state_concentration=10., alpha=6., gamma=6., obs_distns=obs_distns, dur_distns=dur_distns) model.add_data(data) ############## # resample # ############## for itr in progprint_xrange(10): model.resample_model() ################ # viterbi EM # ################ for itr in progprint_xrange(50): model.Viterbi_EM_step() ########## # plot # ########## plt.figure() model.plot()
obs_distns = [ pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(N) ] # Build the HMM model that will represent the fitmodel fitmodel = pyhsmm.models.HMM( alpha=50., gamma=50., init_state_concentration=50., # these are only used for initialization obs_distns=obs_distns) fitmodel.add_data(data) print 'Gibbs sampling for initialization' for idx in progprint_xrange(25): fitmodel.resample_model() plt.figure() fitmodel.plot() plt.gcf().suptitle('Gibbs-sampled initialization') print 'EM' fitmodel.EM_fit() plt.figure() fitmodel.plot() plt.gcf().suptitle('EM fit') plt.show()
Kmax = 10 # number of latent discrete states D_latent = 2 # latent linear dynamics' dimension D_obs = 2 # data dimension Cs = [np.eye(D_obs) for _ in range(Kmax)] # Shared emission matrices sigma_obss = [0.05 * np.eye(D_obs) for _ in range(Kmax)] # Emission noise covariances model = DefaultSLDS( K=Kmax, D_obs=D_obs, D_latent=D_latent, Cs=Cs, sigma_obss=sigma_obss) model.add_data(data) model.resample_states() for _ in progprint_xrange(10): model.resample_model() model.states_list[0]._init_mf_from_gibbs() #################### # run mean field # #################### vlbs = [] for _ in progprint_xrange(50): vlbs.append(model.meanfield_coordinate_descent_step()) plt.figure() plt.plot(vlbs) plt.xlabel("Iteration")
K_0=D_latent * np.eye(D_latent + D_input), ) for _ in range(Kmax) ], emission_distns=DiagonalRegression( D_obs, D_latent + D_input, alpha_0=2.0, beta_0=1.0, ), alpha=3., init_state_distn='uniform') model.add_data(data, inputs=np.ones((T, D_input))) model.resample_states() for _ in progprint_xrange(0): model.resample_model() model.states_list[0]._init_mf_from_gibbs() #################### # run mean field # #################### vlbs = [] for _ in progprint_xrange(N_iter): model.VBEM_step() vlbs.append(model.VBEM_ELBO()) if len(vlbs) > 1: assert vlbs[-1] > vlbs[-2] - 1e-8 plt.figure()
temp = np.concatenate(((0,),truemodel.states_list[0].durations.cumsum())) changepoints = zip(temp[:-1],temp[1:]) changepoints[-1] = (changepoints[-1][0],T) # because last duration might be censored ######################### # posterior inference # ######################### Nmax = 25 obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)] dur_distns = [pyhsmm.distributions.GeometricDuration(**dur_hypparams) for state in range(Nmax)] posteriormodel = pyhsmm.models.GeoHSMMPossibleChangepoints( alpha=6., init_state_concentration=6., obs_distns=obs_distns, dur_distns=dur_distns) posteriormodel.add_data(data,changepoints=changepoints) for idx in progprint_xrange(50): posteriormodel.resample_model() plt.figure() posteriormodel.plot() plt.show()
nu_0=3, S_0=np.eye(2), M_0=np.zeros((2,4+affine)), K_0=np.eye(4+affine), affine=affine) for state in range(Nmax)] dur_distns=[NegativeBinomialIntegerR2Duration( r_discrete_distn=np.ones(10.),alpha_0=1.,beta_0=1.) for state in range(Nmax)] model = m.ARWeakLimitHDPHSMMIntNegBin( alpha=4.,gamma=4.,init_state_concentration=10., obs_distns=obs_distns, dur_distns=dur_distns, ) model.add_data(data) ############### # inference # ############### for itr in progprint_xrange(100): model.resample_model() plt.figure() model.plot() plt.figure() colors = ['b','r','y','k','g'] stateseq = model.states_list[0].stateseq for i,s in enumerate(np.unique(stateseq)): plt.plot(data[s==stateseq,0],data[s==stateseq,1],colors[i % len(colors)] + 'o')
obs_hypparams = {'mu_0':np.zeros(obs_dim), 'sigma_0':np.eye(obs_dim), 'kappa_0':0.25, 'nu_0':obs_dim+2} obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(N)] # Build the HMM model that will represent the fitmodel fitmodel = pyhsmm.models.HMM( alpha=50.,init_state_concentration=50., # these are only used for initialization obs_distns=obs_distns) fitmodel.add_data(data) print('Gibbs sampling for initialization') for idx in progprint_xrange(25): fitmodel.resample_model() plt.figure() fitmodel.plot() plt.gcf().suptitle('Gibbs-sampled initialization') print('EM') likes = fitmodel.EM_fit() plt.figure() fitmodel.plot() plt.gcf().suptitle('EM fit') plt.figure()
test_data, _ = truemodel.generate(1000) ################ # fit models # ################ Nmax = 10 ### GMM obs_distns = [pyhsmm.distributions.Categorical(alpha_0=0.5,K=N) for state in xrange(Nmax)] gmm = pyhsmm.basic.models.Mixture(alpha_0=N,components=obs_distns) for training_data in training_datas: gmm.add_data(training_data) for itr in progprint_xrange(resample_iter): gmm.resample_model() # gmm.delayed_log_likelihood = lambda test_data,prefix,start,end: gmm.log_likelihood(test_data[start:end+1]) gmm.predict_log_likelihood = lambda test_data,prefix: gmm._log_likelihoods(test_data[prefix:]) # gmm.predict_log_likelihood_fixedhorizons = lambda test_data,max_horizon: [gmm._log_likelihoods(test_data[t:]) for t in range(max_horizon)] ### HMM obs_distns = [pyhsmm.distributions.Categorical(alpha_0=0.5,K=N) for state in xrange(Nmax)] hmm = pyhsmm.models.StickyHMMEigen(alpha=10,gamma=10,init_state_concentration=10,kappa=30, obs_distns=obs_distns) for training_data in training_datas: hmm.add_data(training_data) for itr in progprint_xrange(resample_iter):
weights=row) for row in init_weights] ################ # build HSMM # ################ dur_distns = [NegativeBinomialIntegerRVariantDuration(np.r_[0.,0,0,1,1,1,1,1],alpha_0=5.,beta_0=5.) for state in range(library_size)] model = LibraryHSMMIntNegBinVariant( init_state_concentration=10., alpha=6.,gamma=6., obs_distns=obs_distns, dur_distns=dur_distns) model.add_data(data) ##################### # sample and save # ##################### models = [model.resample_and_copy() for itr in progprint_xrange(100)] with open('models.pickle','w') as outfile: cPickle.dump(models,outfile,protocol=-1) with open('model.pickle','w') as outfile: cPickle.dump(models[0],outfile,protocol=-1)
Nmax = 20 model = m.ARHMM( nlags=2, alpha=4.,gamma=4.,init_state_concentration=10., obs_distns=[d.MNIW(dof=3,S=np.eye(2),M=np.zeros((2,4)),K=np.eye(4)) for state in range(Nmax)], ) model.add_data(data) ###################### # do DAT INFERENCE # ###################### print 'Gibbs sampling initialization' for itr in progprint_xrange(5): model.resample_model() print 'EM' for itr in progprint_xrange(50): model.EM_step() plt.figure() model.plot() plt.figure() colors = ['b','r','y','k','g'] stateseq = model.states_list[0].stateseq for i,s in enumerate(np.unique(stateseq)): plt.plot(data[s==stateseq,0],data[s==stateseq,1],colors[i % len(colors)] + 'o')
alpha_0=components_per_gmm, weights=row) for row in init_weights] ############## # build MM # ############## model = LibraryMM( alpha_0=6., components=meta_components) model.add_data(data) ################## # infer things # ################## for i in progprint_xrange(50): model.resample_model() plt.figure() truemodel.plot() plt.gcf().suptitle('truth') plt.figure() model.plot() plt.gcf().suptitle('inferred') plt.show()
plt.figure() model.plot() plt.gcf().suptitle('subHSMM sampled model after {} iterations'.format(ITERATIONS)) plt.savefig('plots/' + testcase + '/subhmm.png') plt.close() s = model.states_list[0] """ ### HDP-HMM without the sticky bias obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)] posteriormodel = pyhsmm.models.WeakLimitHDPHMM(alpha=6.,gamma=6.,init_state_concentration=1., obs_distns=obs_distns) posteriormodel.add_data(data) for idx in progprint_xrange(ITERATIONS): posteriormodel.resample_model() posteriormodel.plot() plt.gcf().suptitle('HDP-HMM sampled model after {} iterations'.format(ITERATIONS)) plt.savefig('plots/' + testcase + '/hdp-hmm.png') plt.close() # Some more hypparams obs_hypparams = {'mu_0':np.zeros(obs_dim), 'sigma_0':np.eye(obs_dim), 'kappa_0':0.3, 'nu_0':obs_dim+5} dur_hypparams = {'alpha_0':2, 'beta_0':2}
obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)] dur_distns = [pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)] posteriormodel = pyhsmm.models.HSMM( alpha=6.,gamma=6., # these can matter; better to sample over them (concentration-resampling.py) init_state_concentration=6., # pretty inconsequential obs_distns=obs_distns, dur_distns=dur_distns, trunc=90) # duration truncation speeds things up when it's possible # xun: trunc was 60 in demo posteriormodel.add_data(data0) posteriormodel.add_data(data1) posteriormodel.add_data(data2) models = [] for idx in progprint_xrange(11): # xun: we use 31 instead of 150 posteriormodel.resample_model() if (idx+1) % 10 == 0: models.append(copy.deepcopy(posteriormodel)) fig = plt.figure() for idx, model in enumerate(models): plt.clf() model.plot() plt.gcf().suptitle('HDP-HSMM sampled after %d iterations' % (10*(idx+1))) if SAVE_FIGURES: plt.savefig('iter_%.3d.png' % (10*(idx+1))) plt.show()
################## Nmax = 10 affine = True nlags = 2 model = m.ARWeakLimitStickyHDPHMM( alpha=4., gamma=4., kappa=50., init_state_distn='uniform', obs_distns=[ d.AutoRegression(nu_0=2.5, S_0=2.5 * np.eye(2), M_0=np.zeros((2, 2 * nlags + affine)), K_0=10 * np.eye(2 * nlags + affine), affine=affine) for state in range(Nmax) ], ) model.add_data(data) ############### # inference # ############### fig = model.make_figure() model.plot(fig=fig, draw=False) for _ in progprint_xrange(300): model.resample_model() model.plot(fig=fig, update=True)
D_obs = 2 # data dimension Cs = [np.eye(D_obs) for _ in range(Kmax)] # Shared emission matrices sigma_obss = [0.05 * np.eye(D_obs) for _ in range(Kmax)] # Emission noise covariances model = DefaultSLDS(K=Kmax, D_obs=D_obs, D_latent=D_latent, Cs=Cs, sigma_obss=sigma_obss) model.add_data(data) model.resample_states() for _ in progprint_xrange(10): model.resample_model() model.states_list[0]._init_mf_from_gibbs() #################### # run mean field # #################### vlbs = [] for _ in progprint_xrange(50): vlbs.append(model.meanfield_coordinate_descent_step()) plt.figure() plt.plot(vlbs) plt.xlabel("Iteration") plt.ylabel("VLB")
A=np.eye(D),sigma=0.1*np.eye(D), # TODO remove special case nu_0=5,S_0=np.eye(D),M_0=np.zeros((D,P)),K_0=np.eye(P)) for _ in xrange(Nmax)] init_dynamics_distns = [ Gaussian(nu_0=5,sigma_0=np.eye(P),mu_0=np.zeros(P),kappa_0=1.) for _ in xrange(Nmax)] model = WeakLimitStickyHDPHMMSLDS( dynamics_distns=dynamics_distns, emission_distns=emission_distns, init_dynamics_distns=init_dynamics_distns, kappa=50.,alpha=5.,gamma=5.,init_state_concentration=1.) ################## # run sampling # ################## def resample(): model.resample_model() return model.stateseqs[0].copy() model.add_data(data) samples = [resample() for _ in progprint_xrange(1000)] plt.matshow(np.vstack(samples+[np.tile(labels,(10,1))])) plt.show()
np.ones((10, )), alpha_0=alpha_0, beta_0=beta_0) for state in range(n_states) ] hsmm = library_models.LibraryHSMMIntNegBinVariant( init_state_concentration=10., alpha_a_0=1.0, alpha_b_0=1. / 10, gamma_a_0=1, gamma_b_0=1, # alpha=2, gamma=20.0, obs_distns=obs_distns, dur_distns=dur_distns) hsmm.add_data(training_data) ########################## # Gather model samples # ########################## for itr in progprint_xrange(num_iter): hsmm.resample_model() hsmms = [hsmm.resample_and_copy() for itr in progprint_xrange(1)] ########## # Save # ########## with open('/scratch/hsmm_results.pickle', 'w') as outfile: cPickle.dump(hsmms, outfile, protocol=-1)
################## Nmax = 10 affine = True nlags = 2 model = m.ARWeakLimitStickyHDPHMM( alpha=4.,gamma=4.,kappa=50., init_state_distn='uniform', obs_distns=[ d.AutoRegression( nu_0=2.5, S_0=2.5*np.eye(2), M_0=np.zeros((2,2*nlags+affine)), K_0=10*np.eye(2*nlags+affine), affine=affine) for state in range(Nmax)], ) model.add_data(data) ############### # inference # ############### fig = model.make_figure() model.plot(fig=fig,draw=False) for _ in progprint_xrange(300): model.resample_model() model.plot(fig=fig,update=True)
NegativeBinomialIntegerRVariantDuration(np.r_[0., 0, 0, 0, 0, 0, 1, 1, 1, 1], alpha_0=5., beta_0=5.) for state in range(library_size) ] hsmm = LibraryHSMMIntNegBinVariant(init_state_concentration=10., alpha=6., gamma=2., obs_distns=obs_distns, dur_distns=dur_distns) for data in training_datas: hsmm.add_data(data, left_censoring=True) for itr in progprint_xrange(resample_iter): hsmm.resample_model() ### degrade into HMM, use the same learned syllables! hmm = LibraryHMMFixedObs(init_state_concentration=10., alpha=6., gamma=2., obs_distns=hsmm.obs_distns) for data in training_datas: hmm.add_data(data) for itr in progprint_xrange(resample_iter): hmm.resample_model() ### degrade into GMM, use the same learned syllables!
for state in range(library_size)] model = LibraryHSMMIntNegBinVariant( init_state_concentration=10., alpha=6.,gamma=6., obs_distns=obs_distns, dur_distns=dur_distns) for data in training_datas: model.add_data(data,left_censoring=True) # model.add_data_parallel(data,left_censoring=True) ################## # infer things # ################## train_likes = [] test_likes = [] for i in progprint_xrange(5): model.resample_model() # model.resample_model_parallel() train_likes.append(model.log_likelihood()) # test_likes.append(model.log_likelihood(test_data,left_censoring=True)) newmodel = model.unfreeze() for i in progprint_xrange(5): newmodel.resample_model()
np.r_[0,0,0,0,0,1.,1.,1.], # discrete distribution uniform over {6,7,8} alpha_0=9,beta_0=1, # average geometric success probability 1/(9+1) ) for state in range(Nmax)] model = pyhsmm.models.HSMMIntNegBinVariant( init_state_concentration=10., alpha=6.,gamma=6., obs_distns=obs_distns, dur_distns=dur_distns) model.add_data(data,left_censoring=True) ############## # resample # ############## for itr in progprint_xrange(10): model.resample_model() ################ # viterbi EM # ################ model.Viterbi_EM_fit() ########## # plot # ########## plt.figure() model.plot()
component.add_data(data=observations) components.append(component) # Data structure to collect results results = []#pd.DataFrame(columns=['Log-Likelihood', 'Accuracy', 'Precision', 'Recall', 'F1']) # Collect stats into the dataframe stats = collect_stats(components, real_states, observations, testing, weights, np.ones((1, testing.shape[1])), Model) results.append(list(stats)) # This is Gibbs sampling for itr in progprint_xrange(ITER): # In each resamle, we are going to fix all the chains except one, then resample # that chain given all the other fixed ones. After doing this, a single # resample of the factorial model is done # Resample the variances states = np.matrix(np.zeros((COMP+1, observations.shape[0]))) states[-1, :] = 1 for i, component in enumerate(components): states[i, :] = component.states_list[0].stateseq # Now compute the means means = np.matrix(weights)*states # Squared summed error sse = np.power(observations - means.T, 2).sum(axis=0)
nu_0=obs_dim + 2) obs_distns = \ [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)] model = pyhsmm.models.WeakLimitStickyHDPHMM(kappa=50., alpha=6., gamma=6., init_state_concentration=1., obs_distns=obs_distns) model.add_data(data) # run inference! fig = model.make_figure() model.plot(fig=fig, draw=False) for _ in progprint_xrange(250): model.resample_model() model.plot(fig=fig, update=True) # from moviepy.video.io.bindings import mplfig_to_npimage # from moviepy.editor import VideoClip # fig = model.make_figure() # model.plot(fig=fig,draw=False) # def make_frame_mpl(t): # model.resample_model() # model.plot(fig=fig,update=True,draw=False) # return mplfig_to_npimage(fig) # animation = VideoClip(make_frame_mpl, duration=10)
def find_states(features_file): meeting_base = features_file.split('/')[-1] print(meeting_base) data = np.genfromtxt(features_file, delimiter=',') # data = np.load(features_file) labels = pd.read_csv(os.path.join(labels_dir, meeting_base)) true_seq = labels['combined'] # features_file = open(features_file, 'rb') # data = pickle.load(features_file) ########################## # Sticky-HDP-HMM # ########################## # and some hyperparameters obs_dim = data.shape[1] obs_hypparams = {'mu_0':np.zeros(obs_dim), 'sigma_0':np.eye(obs_dim), 'kappa_0':0.25, 'nu_0':obs_dim+2} # create a bunch of multivariate gaussians obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)] # parameters for priors taken from Fox 2012 gamma_draw = gamma(12,2) alpha_plus_kappa_draw = gamma(6,1) sigma_draw = gamma(1,0.5) rho_draw = beta(500,5) # can deterministically retrieve kappa and alpha from draws for alpha+kappa and rho kappa = rho_draw * alpha_plus_kappa_draw alpha = (1-rho_draw) * alpha_plus_kappa_draw print('kappa: {}, alpha: {}, gamma: {}'.format(kappa, alpha, gamma_draw)) # ipdb.set_trace() obs_hypparams = {'mu_0':np.zeros(obs_dim), 'sigma_0':np.eye(obs_dim), 'kappa_0':0.3, 'nu_0':obs_dim+5} dur_hypparams = {'alpha_0':2*30, 'beta_0':2, 'lmbda':2.5 } obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)] dur_distns = [pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)] # ipdb.set_trace() posteriormodel = pyhsmm.models.WeakLimitStickyHDPHMM( # NOTE: instead of passing in alpha_0 and gamma_0, we pass in parameters # for priors over those concentration parameters kappa=kappa,alpha=alpha,gamma=gamma_draw, init_state_concentration=6., obs_distns=obs_distns) # ipdb.set_trace() # posteriormodel = pyhsmm.models.WeakLimitHDPHSMM( # alpha=alpha,gamma=gamma_draw,init_state_concentration=1., # obs_distns=obs_distns, # dur_distns=dur_distns) # data = np.zeros(data.shape) # posteriormodel = pyhsmm.models.WeakLimitHDPHSMM( # # NOTE: instead of passing in alpha_0 and gamma_0, we pass in parameters # # for priors over those concentration parameters # alpha_a_0=1.,alpha_b_0=1./4, # gamma_a_0=1.,gamma_b_0=1./4, # init_state_concentration=6., # obs_distns=obs_distns, # dur_distns=dur_distns) posteriormodel.add_data(data) # for idx in progprint_xrange(100): # posteriormodel.resample_model() # plt.figure() # posteriormodel.plot() # plt.gcf().suptitle('Sampled after 100 iterations') # plt.figure() # t = np.linspace(0.01,30,1000) # plt.plot(t,scipy.stats.gamma.pdf(t,1.,scale=4.)) # NOTE: numpy/scipy scale is inverted compared to my scale # plt.title('Prior on concentration parameters') # plt.show() # posteriormodel.add_data(data) # # ipdb.set_trace() num_cpu = 0 # # num_iterations = 1 all_trans_matrices = [] for idx in progprint_xrange(num_iterations): posteriormodel.resample_model(num_procs=num_cpu) trans_matrix = np.array([row.weights for row in posteriormodel.trans_distn._row_distns]) all_trans_matrices.append(trans_matrix) # final_trans_matrix = np.mean(np.array(all_trans_matrices), axis=0) # average transition probs # # ipdb.set_trace() # for i in range(len(final_trans_matrix)): # posteriormodel.trans_distn._row_distns[i].weights = final_trans_matrix[i] # trans_matrix = np.array([row.weights for row in posteriormodel.trans_distn._row_distns]) # print trans_matrix # posteriormodel.resample_model(num_procs=num_cpu) # dump state sequence information hmm_states = posteriormodel.states_list[0] # ipdb.set_trace() if not os.path.exists(output_dir): os.makedirs(output_dir) with open(os.path.join(output_dir, meeting_base + '.pickle'), 'wb') as outf: cPickle.dump(hmm_states, outf) num_states = len(set(hmm_states.stateseq_norep)) average_duration = np.average(hmm_states.durations) / 10.0 print('Num States: {}'.format(num_states)) print('Average Duration: {}'.format('{0:.2f}'.format(average_duration))) diarization_error, best_seq, _ = hdphmm_utils.find_error_rate(hmm_states.stateseq, true_seq) diarization_error = '{0:.3f}'.format(diarization_error) print('DER: {}'.format(diarization_error)) hdphmm_utils.plot_pred_labels(meeting_base, best_seq, labels_dir, plot_dir, diarization_error) # EM PLOTTING # plt.figure() # posteriormodel.plots() # plt.gcf().suptitle('Gibbs-sampled initialization') # print 'EM' # likes = posteriormodel.EM_fit() # plt.figure() # posteriormodel.plot() # plt.gcf().suptitle('EM fit') # plt.figure() # plt.plot(likes) # plt.gcf().suptitle('log likelihoods during EM') # plt.show() # DONE # posteriormodel.plot() # plt.gcf().suptitle('Sticky HDP-HMM sampled model: {}\n \ # Num States: {}, Avg Duration: {}s, Num Iterations: {}'\ # .format(filebase, num_states, # '{0:.2f}'.format(average_duration), num_iterations)) # plt.savefig(os.path.join(output_dir, 'plots', filebase + '_' + str(Nmax) + '.png')) # plt.show() # plt.cla() # plt.clf() print('') return float(diarization_error)
################ dur_distns = [NegativeBinomialIntegerRVariantDuration(np.r_[0.,0,0,1,1,1,1,1],alpha_0=5.,beta_0=5.) for state in range(Nmax)] model = LibraryHSMMIntNegBinVariant( init_state_concentration=10., alpha=6.,gamma=6., obs_distns=obs_distns, dur_distns=dur_distns) for data in datas: model.add_data_parallel(data,left_censoring=True) ################## # infer things # ################## for i in progprint_xrange(25): model.resample_model_parallel() # plt.figure() # truemodel.plot() # plt.gcf().suptitle('truth') # plt.figure() # model.plot() # plt.gcf().suptitle('inferred') # plt.show()
######################### Nmax = 25 obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)] dur_distns = [pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)] posteriormodel = pyhsmm.models.HSMM( # NOTE: instead of passing in alpha_0 and gamma_0, we pass in parameters # for priors over those concentration parameters alpha_a_0=1.,alpha_b_0=1./4, gamma_a_0=1.,gamma_b_0=1./4, init_state_concentration=6., obs_distns=obs_distns, dur_distns=dur_distns,trunc=70) posteriormodel.add_data(data) for idx in progprint_xrange(100): posteriormodel.resample_model() plt.figure() posteriormodel.plot() plt.gcf().suptitle('Sampled after 100 iterations') plt.figure() t = np.linspace(0.01,30,1000) plt.plot(t,stats.gamma.pdf(t,1.,scale=4.)) # NOTE: numpy/scipy scale is inverted compared to my scale plt.title('Prior on concentration parameters') plt.show()
for state in range(library_size)] model = LibraryHSMMIntNegBinVariant( init_state_concentration=10., alpha=6.,gamma=6., obs_distns=obs_distns, dur_distns=dur_distns) for data in training_datas: model.add_data(data,left_censoring=True) ################## # infer things # ################## samples1 = [model.resample_and_copy() for i in progprint_xrange(1)] samples2 = [model.resample_and_copy() for i in progprint_xrange(10)] samples3 = [model.resample_and_copy() for i in progprint_xrange(100)] # samples4 = [model.resample_and_copy() for i in progprint_xrange(1000)] import cPickle with open('samples1','w') as outfile: cPickle.dump(samples1,outfile,protocol=-1) with open('samples2','w') as outfile: cPickle.dump(samples2,outfile,protocol=-1) with open('samples3','w') as outfile: cPickle.dump(samples3,outfile,protocol=-1) # with open('samples4','w') as outfile: