示例#1
0
	def fit(self):
		data = np.squeeze(np.array(self._demonstrations[0])) #np.loadtxt(os.path.join(os.path.dirname(__file__),'example-data.txt'))[:T]
		Nmax = 25

		# and some hyperparameters
		obs_dim = data.shape[1]
		print data.shape
		obs_hypparams = {'mu_0':np.zeros(obs_dim),
                'sigma_0':np.eye(obs_dim),
                'kappa_0':0.25,
                'nu_0':obs_dim+2}
		dur_hypparams = {'alpha_0':2*30,
                 'beta_0':2}

		obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)]
		dur_distns = [pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)]

		posteriormodel = pyhsmm.models.WeakLimitHDPHSMM(
        	alpha=6.,gamma=6., # these can matter; see concentration-resampling.py
        	init_state_concentration=6., # pretty inconsequential
        	obs_distns=obs_distns,
        	dur_distns=dur_distns)
		
		for d in self._demonstrations:
			posteriormodel.add_data(np.squeeze(np.array(d)),trunc=60) # duration truncation speeds things up when it's possible

		for idx in progprint_xrange(50):
			posteriormodel.resample_model()

		new_segments = []
		for i in range(0, len(self._demonstrations)):
			new_segments.append(self.findTransitions(posteriormodel.states_list[i].stateseq))

		self.segmentation = new_segments
		self.model = posteriormodel
示例#2
0
    def test_examples(self):
        np.seterr(divide='ignore') # these warnings are usually harmless for this code

        #  load_basic data
        T = 1000
        data = np.loadtxt(os.path.join(os.path.dirname(__file__),'example-data.txt'))[:T]

        #  posterior inference

        Nmax = 25

        obs_dim = data.shape[1]
        obs_hypparams = {'mu_0':np.zeros(obs_dim),
                        'sigma_0':np.eye(obs_dim),
                        'kappa_0':0.25,
                        'nu_0':obs_dim+2}
        dur_hypparams = {'alpha_0':2*30,
                         'beta_0':2}

        obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)]
        dur_distns = [pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)]

        hsmm = pyhsmm.models.WeakLimitHDPHSMM(
                alpha=6.,gamma=6., # these can matter; see concentration-resampling.py
                init_state_concentration=6., # pretty inconsequential
                obs_distns=obs_distns,
                dur_distns=dur_distns)
        hsmm.add_data(data,trunc=60) # duration truncation speeds things up when it's possible

        for idx in progprint_xrange(150):
           hsmm.resample_model()

        hsmm.plot()

        plt.show()
def train_sticky_hdp_hmm(data_sets, nmax, niter, nsamples):
    """

    :param data_sets: a list of ndarrays.
    :param nmax:
    :param niter:
    :param nsamples:
    :return:
    """
    interval = niter / nsamples
    obs_dim = data_sets[0].shape[-1]
    obs_hypparams = {'mu_0': np.zeros(obs_dim),
                     'sigma_0': np.eye(obs_dim),
                     'kappa_0': 0.25,
                     'nu_0': obs_dim + 2}

    ### HDP-HMM without the sticky bias

    obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(nmax)]
    posteriormodel = pyhsmm.models.WeakLimitStickyHDPHMM(kappa=50., alpha=6., gamma=6., init_state_concentration=1.,
                                                         obs_distns=obs_distns)
    for d in data_sets:
        posteriormodel.add_data(d)

    models = []
    last = None
    for idx in progprint_xrange(niter):
        posteriormodel.resample_model()
        if idx % interval == 0:
            models.append(copy.deepcopy(posteriormodel))
        last = posteriormodel
    models[nsamples - 1] = posteriormodel
    return models
示例#4
0
def hmm_run(data, Nmax=6, step=100, kappa=12.5, alpha=3., gamma=3.):
    # Set the weak limit truncation level
    # and some hyperparameters
    obs_dim = 1
    try:
        obs_dim = data.shape[1]
    except:
        pass

    obs_hypparams = {
        'mu_0': np.zeros(obs_dim),
        'sigma_0': np.eye(obs_dim),
        'kappa_0': 10,
        'nu_0': obs_dim + 2
    }

    obs_distns = [
        pyhsmm.distributions.Gaussian(**obs_hypparams)
        for state in xrange(Nmax)
    ]
    model = pyhsmm.models.WeakLimitStickyHDPHMM(kappa=kappa,
                                                alpha=alpha,
                                                gamma=gamma,
                                                init_state_concentration=1.,
                                                obs_distns=obs_distns)
    model.add_data(data)
    for idx in progprint_xrange(step):
        model.resample_model()

    return model
示例#5
0
def fit_model(data, N_iters, args={}):
    # Now fit the model with a model using all the data

    default_args = dict(N=N,
                        K_max=K,
                        alpha=alpha,
                        gamma=gamma,
                        alpha_obs=alpha_obs,
                        beta_obs=beta_obs,
                        init_state_concentration=1.0)
    default_args.update(args)
    model = PoissonHDPHMM(**default_args)
    model.add_data(data)

    def _evaluate(model):
        ll = model.log_likelihood()
        return ll, K_used(model)

    def _step(model):
        model.resample_model()
        return _evaluate(model)

    results = [_step(model) for _ in progprint_xrange(N_iters)]
    lls = np.array([r[0] for r in results])
    Ks = np.array([r[1] for r in results])
    return lls, Ks
示例#6
0
def estimate_hidden_states(data,Nb_states,Nb_samples,verbose=0,pp=None,title_text=None):
    ### define uninformed priors
    prior_means = np.linspace(data.min(),data.max(),Nb_states)
    gauss = [{'mu_0': mean, 'sigmasq': 1.,'tausq_0': 1.} for mean in prior_means]
    #NegBin = {'alpha_0': 1.,'beta_0': 1.,'r': 1.}
    #normal = distributions.ScalarGaussianFixedvar(mu_0=np.mean(data),tausq_0=100.,sigmasq=100.)
    gaussMixture = [distributions.ScalarGaussianFixedvarMixture(mu_0_list=prior_means,tausq_0_list=[1.]*Nb_states,mixture_weights_0=[1./Nb_states]*Nb_states,sigmasq=1.) for _ in range(Nb_states)]
    #gaussMixture = [distributions.MixtureDistribution(components=[distributions.ScalarGaussianFixedvar(**gauss_params) for gauss_params in gauss],alpha_0=Nb_states) for _ in range(Nb_states)]
    #poi_dist = distributions.PoissonDuration(alpha_0=10.,beta_0=2.)
    #ng_dist = distributions.NegativeBinomialFixedRDuration(r=1.,alpha_0=1.,beta_0=1.)
    pnbMixture = [distributions.MixtureDurationDistribution(components=[distributions.PoissonDuration(alpha_0=10.,beta_0=2.),distributions.NegativeBinomialFixedRDuration(r=1.,alpha_0=1.,beta_0=1.)],alpha_0=Nb_states) for _ in range(Nb_states)]

    ### construct model
    posterior_HDPHSMM = pyhsmm.models.WeakLimitHDPHSMMPossibleChangepoints(
        init_state_distn='uniform',
        alpha=20.,gamma=20.,
        obs_distns=gaussMixture,
        dur_distns=pnbMixture)
    # obs_distns=[distributions.ScalarGaussianFixedvar(**gauss_params) for gauss_params in gauss]
    
    ### train model
    if verbose == 0:
        iterator = range(Nb_samples)
    elif verbose == 1 or verbose == 2:
        iterator = progprint_xrange(Nb_samples)
    train_model(posterior_HDPHSMM,data,iterator)
    
    ### show result
    if verbose == 1:
        show_result(posterior_HDPHSMM,data,pp=pp,title_text=title_text)
    elif verbose == 2:
        show_result(posterior_HDPHSMM,data,draw=True,pp=pp,title_text=title_text)
        
    return posterior_HDPHSMM.stateseqs[0]
示例#7
0
	def fit(self):
		p = self._demonstration_sizes[0][1]

		Nmax = self._demonstration_sizes[0][0]
		affine = True
		nlags = self.lag
		obs_distns=[di.AutoRegression(
    				nu_0=self.nu, S_0=np.eye(p), M_0=np.zeros((p,2*p+affine)),
    				K_0=np.eye(2*p+affine), affine=affine) for state in range(Nmax)]

		dur_distns=[NegativeBinomialIntegerR2Duration(
    				r_discrete_distn=np.ones(10.),alpha_0=1.,beta_0=1.) for state in range(Nmax)]

		model = m.ARWeakLimitHDPHSMMIntNegBin(
        alpha=self.alpha,gamma=self.gamma,init_state_concentration=self.init_state_concentration,
        	obs_distns=obs_distns,
        	dur_distns=dur_distns,
        )


		for d in self._demonstrations:
			model.add_data(d,trunc=60)

		#model.resample_model()

		for itr in progprint_xrange(20):
			model.resample_model()

		new_segments = []
		for i in range(0, len(self._demonstrations)):
			#print model.states_list[i].stateseq
			new_segments.append(self.findTransitions(model.states_list[i].stateseq))

		self.segmentation = new_segments
		self.model = model
def _fit_hmm(hmm, algo_name):
	if algo_name=="Baum-Welch":
		print('Gibbs sampling for initialization')
		for idx in progprint_xrange(25):
			hmm.resample_model()
		## in models line 440, we se that this is resampling the parameters and resampling the states 
		print('fit EM model')
		likes = hmm.EM_fit()
	else:
		raise NotImplementedError
	return hmm 
    def fit(self, data, num_iterations=1000, verbose=True):
        self.data = data
        self.num_datapoints = self.data.shape[0]
        self.data_dimensions = self.data.shape[1]

        self.hmm_model.add_data(data)

        if verbose:
            index_range = progprint_xrange(num_iterations)
        else:
            index_range = np.arange(num_iterations)

        self.model_samples = [
            self.hmm_model.resample_and_copy() for index in index_range
        ]
    def fit(self):
        data = np.squeeze(
            np.array(self._demonstrations[0])
        )  #np.loadtxt(os.path.join(os.path.dirname(__file__),'example-data.txt'))[:T]
        Nmax = 25

        # and some hyperparameters
        obs_dim = data.shape[1]
        print data.shape
        obs_hypparams = {
            'mu_0': np.zeros(obs_dim),
            'sigma_0': np.eye(obs_dim),
            'kappa_0': 0.25,
            'nu_0': obs_dim + 2
        }
        dur_hypparams = {'alpha_0': 2 * 30, 'beta_0': 2}

        obs_distns = [
            pyhsmm.distributions.Gaussian(**obs_hypparams)
            for state in range(Nmax)
        ]
        dur_distns = [
            pyhsmm.distributions.PoissonDuration(**dur_hypparams)
            for state in range(Nmax)
        ]

        posteriormodel = pyhsmm.models.WeakLimitHDPHSMM(
            alpha=6.,
            gamma=6.,  # these can matter; see concentration-resampling.py
            init_state_concentration=6.,  # pretty inconsequential
            obs_distns=obs_distns,
            dur_distns=dur_distns)

        for d in self._demonstrations:
            posteriormodel.add_data(
                np.squeeze(np.array(d)), trunc=60
            )  # duration truncation speeds things up when it's possible

        for idx in progprint_xrange(50):
            posteriormodel.resample_model()

        new_segments = []
        for i in range(0, len(self._demonstrations)):
            new_segments.append(
                self.findTransitions(posteriormodel.states_list[i].stateseq))

        self.segmentation = new_segments
        self.model = posteriormodel
    def fit(self):
        p = self._demonstration_sizes[0][1]

        Nmax = self._demonstration_sizes[0][0]
        affine = True
        nlags = self.lag
        obs_distns = [
            di.AutoRegression(nu_0=self.nu,
                              S_0=np.eye(p),
                              M_0=np.zeros((p, 2 * p + affine)),
                              K_0=np.eye(2 * p + affine),
                              affine=affine) for state in range(Nmax)
        ]

        dur_distns = [
            NegativeBinomialIntegerR2Duration(r_discrete_distn=np.ones(10.),
                                              alpha_0=1.,
                                              beta_0=1.)
            for state in range(Nmax)
        ]

        model = m.ARWeakLimitHDPHSMMIntNegBin(
            alpha=self.alpha,
            gamma=self.gamma,
            init_state_concentration=self.init_state_concentration,
            obs_distns=obs_distns,
            dur_distns=dur_distns,
        )

        for d in self._demonstrations:
            model.add_data(d, trunc=60)

        #model.resample_model()

        for itr in progprint_xrange(20):
            model.resample_model()

        new_segments = []
        for i in range(0, len(self._demonstrations)):
            #print model.states_list[i].stateseq
            new_segments.append(
                self.findTransitions(model.states_list[i].stateseq))

        self.segmentation = new_segments
        self.model = model
    def __init__(self, nparray):
        
        # nparray input
        self.nparray = nparray
        
        # get observation dimension
        self.obs_dim = nparray.shape[1]
        
        # define emission parameters
        self.obs_hypparams = {'mu_0': np.zeros(self.obs_dim),
                              'sigma_0': np.eye(self.obs_dim),
                              'kappa_0': 0.25,
                              'nu_0': self.obs_dim+2}
        
        # define duration parameters
        self.dur_hypparams = {'alpha_0':2*30,
                         'beta_0':2}
        
        # define max hidden states to uncover
        self.Nmax = 10
        
        # init emission distributions
        self.obs_distns = [pyhsmm.distributions.Gaussian(**self.obs_hypparams) 
                           for state in range(self.Nmax)]
        
        # init duration distributions
        self.dur_distns = [pyhsmm.distributions.PoissonDuration(
                           **self.dur_hypparams) for state in range(self.Nmax)]
        # init hdp-hsmm
        self.posteriormodel = pyhsmm.models.WeakLimitHDPHSMM(alpha=6.,gamma=6.,
                                                   init_state_concentration=6.,
                                                    obs_distns=self.obs_distns,
                                                    dur_distns=self.dur_distns)
        
        # add data 
        self.posteriormodel.add_data(self.nparray, trunc=60)
        
        # train the model
        for idx in progprint_xrange(150):
            self.posteriormodel.resample_model()

        # results
        self.states = self.posteriormodel.stateseqs
    def fit(self, data, lengths):
        obs_dim = data.shape[1]
        Nmax = self.hmm_hidden_state_amount
        dur_length = self.max_duration_length
        maxIter = self.hmm_max_train_iteration
        data_length = lengths

        obs_hypparams = {
            'mu_0': np.zeros(obs_dim),
            'sigma_0': np.eye(obs_dim),
            'kappa_0': 0.25,
            'nu_0': obs_dim + 2
        }
        dur_hypparams = {'alpha_0': 2 * 30, 'beta_0': 2}
        obs_distns = [
            pyhsmm.distributions.Gaussian(**obs_hypparams)
            for state in range(Nmax)
        ]

        dur_distns = [
            pyhsmm.distributions.PoissonDuration(**dur_hypparams)
            for state in range(Nmax)
        ]

        posteriormodel = pyhsmm.models.WeakLimitHDPHSMM(
            alpha=6.,
            gamma=6.,  # thesecan matter; see concentration-resampling.py
            init_state_concentration=6.,  # pretty inconsequential
            obs_distns=obs_distns,
            dur_distns=dur_distns)

        posteriormodel.add_data(
            data, trunc=dur_length
        )  # duration truncation speeds things up when it's possible
        for idx in progprint_xrange(maxIter):
            posteriormodel.resample_model()
            print('-resampling-%d/%d' % (idx, maxIter))

        self.model = posteriormodel

        return self
示例#14
0
def hsmm_segmenter(factors, width=MEDIAN_WIDTH):
    obs = pre.normalize(factors, axis=1)
    obs_dim = obs.shape[1]
    obs_len = obs.shape[0]
    obs_hypparams = {
        'mu_0': np.mean(obs, axis=0),
        'sigma_0': np.cov(obs, rowvar=0),
        'kappa_0': 0.25,
        'nu_0': obs_dim + 2}

    dur_hypparams = {'alpha_0': 48,
                     'beta_0': 2}

    obs_distns = [distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)]
    dur_distns = [distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)]

    posteriormodel = pyhsmm.models.WeakLimitHDPHSMM(
        alpha_a_0=1., alpha_b_0=1./4,
        gamma_a_0=1., gamma_b_0=1./4,
        init_state_concentration=1.,  # pretty inconsequential
        obs_distns=obs_distns,
        dur_distns=dur_distns)

    posteriormodel.add_data(factors, trunc=int(obs_len / 3))

    for idx in textutil.progprint_xrange(150):
        posteriormodel.resample_model()

    labels = posteriormodel.stateseqs
    boundaries, labels = find_boundaries(labels, width)
    best_n_types = len(np.unique(labels))

    if len(boundaries) < best_n_types + 1:
        best_n_types = len(boundaries) - 1

    best_labels = segment_labeling(factors, boundaries, c_method='kmeans', k=best_n_types)
    best_boundaries = np.array(boundaries)

    return best_boundaries, best_labels
    for state in range(library_size)
]

model = LibraryHSMMIntNegBinVariant(init_state_concentration=10.,
                                    alpha=6.,
                                    gamma=6.,
                                    obs_distns=obs_distns,
                                    dur_distns=dur_distns)

for data in training_datas:
    model.add_data(data, left_censoring=True)
    # model.add_data_parallel(data,left_censoring=True)

##################
#  infer things  #
##################

train_likes = []
test_likes = []

for i in progprint_xrange(5):
    model.resample_model()
    # model.resample_model_parallel()
    train_likes.append(model.log_likelihood())
    # test_likes.append(model.log_likelihood(test_data,left_censoring=True))

newmodel = model.unfreeze()

for i in progprint_xrange(5):
    newmodel.resample_model()
示例#16
0
    NegativeBinomialIntegerRVariantDuration(np.r_[0., 0, 0, 1, 1, 1, 1, 1],
                                            alpha_0=5.,
                                            beta_0=5.) for state in range(Nmax)
]

model = LibraryHSMMIntNegBinVariant(init_state_concentration=10.,
                                    alpha=6.,
                                    gamma=6.,
                                    obs_distns=obs_distns,
                                    dur_distns=dur_distns)

for data in datas:
    model.add_data_parallel(data, left_censoring=True)

##################
#  infer things  #
##################

for i in progprint_xrange(25):
    model.resample_model_parallel()

# plt.figure()
# truemodel.plot()
# plt.gcf().suptitle('truth')

# plt.figure()
# model.plot()
# plt.gcf().suptitle('inferred')

# plt.show()
示例#17
0
文件: hmm.py 项目: mattjj/next.ml
    mu_0=np.zeros(obs_dim), sigma_0=np.eye(obs_dim),
    kappa_0=0.25, nu_0=obs_dim+2)

obs_distns = \
    [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)]
model = pyhsmm.models.WeakLimitStickyHDPHMM(
    kappa=50.,alpha=6.,gamma=6.,init_state_concentration=1.,
    obs_distns=obs_distns)
model.add_data(data)


# run inference!
fig = model.make_figure()
model.plot(fig=fig,draw=False)

for _ in progprint_xrange(250):
    model.resample_model()
    model.plot(fig=fig,update=True)



# from moviepy.video.io.bindings import mplfig_to_npimage
# from moviepy.editor import VideoClip

# fig = model.make_figure()
# model.plot(fig=fig,draw=False)

# def make_frame_mpl(t):
#     model.resample_model()
#     model.plot(fig=fig,update=True,draw=False)
#     return mplfig_to_npimage(fig)
示例#18
0
plt.close()
s = model.states_list[0] 
"""

### HDP-HMM without the sticky bias

obs_distns = [
    pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)
]
posteriormodel = pyhsmm.models.WeakLimitHDPHMM(alpha=6.,
                                               gamma=6.,
                                               init_state_concentration=1.,
                                               obs_distns=obs_distns)
posteriormodel.add_data(data)

for idx in progprint_xrange(ITERATIONS):
    posteriormodel.resample_model()

posteriormodel.plot()
plt.gcf().suptitle(
    'HDP-HMM sampled model after {} iterations'.format(ITERATIONS))
plt.savefig('plots/2d-data/hdp-hmm.png')
plt.close()

# Some more hypparams

obs_hypparams = {
    'mu_0': np.zeros(obs_dim),
    'sigma_0': np.eye(obs_dim),
    'kappa_0': 0.3,
    'nu_0': obs_dim + 5
示例#19
0
obs_distns = [
        library_models.FrozenMixtureDistribution(
            components=component_library,
            a_0=1.0,b_0=1./5,weights=weights, # for initialization only
        ) for weights in init_weights]

hmm = library_models.LibraryHMM(
        init_state_concentration=10.,
        alpha_a_0=1.0,alpha_b_0=1./10,
        gamma_a_0=1,gamma_b_0=1,
        obs_distns=obs_distns)
hmm.add_data(training_data)

##########################
#  Gather model samples  #
##########################

for itr in progprint_xrange(num_iter):
    hmm.resample_model()

hmms = [hmm.resample_and_copy() for itr in progprint_xrange(1)]

##########
#  Save  #
##########

with open('/scratch/hmm_results.pickle','w') as outfile:
    cPickle.dump(hmms,outfile,protocol=-1)

示例#20
0
    elif distribution_name=='Dur nbinom':
        return pyhsmm.basic.distributions.NegativeBinomialFixedRDuration(**params)
    elif distribution_name=='Dur poisson':
        return pyhsmm.basic.distributions.PoissonDuration(**params)

L = 2 # Number of times we repeat the distribution for each mode
    
distns = {}
for device in devices_list:
    distns[device] = {'Obs': [],'Dur': []}
    for mode, mode_value in hypparamss[device].items():
        for dist_name in mode_value.keys():
            for _ in range(L):
                distns[device][dist_name.split(' ')[0]].append(get_distribution(dist_name,hypparamss[device][mode][dist_name]))

### construct posterior model
posteriormodel = models.Factorial([models.FactorialComponentHSMM(
        init_state_concentration=2.,
        alpha=1.,gamma=4.,
        obs_distns=distns[device]['Obs'],
        dur_distns=distns[device]['Dur'],
        trunc=200)
    for device in devices_list])

posteriormodel.add_data(data=powers['use'].values)

nsubiter=25
for itr in progprint_xrange(20):
    posteriormodel.resample_model(min_extra_noise=0.1,max_extra_noise=100.**2,niter=nsubiter)
        
示例#21
0
]

model = LibraryHSMMIntNegBinVariant(init_state_concentration=10.,
                                    alpha=6.,
                                    gamma=6.,
                                    obs_distns=obs_distns,
                                    dur_distns=dur_distns)

for data in training_datas:
    model.add_data(data, left_censoring=True)

##################
#  infer things  #
##################

samples1 = [model.resample_and_copy() for i in progprint_xrange(1)]
samples2 = [model.resample_and_copy() for i in progprint_xrange(10)]
samples3 = [model.resample_and_copy() for i in progprint_xrange(100)]
# samples4 = [model.resample_and_copy() for i in progprint_xrange(1000)]

import cPickle
with open('samples1', 'w') as outfile:
    cPickle.dump(samples1, outfile, protocol=-1)

with open('samples2', 'w') as outfile:
    cPickle.dump(samples2, outfile, protocol=-1)

with open('samples3', 'w') as outfile:
    cPickle.dump(samples3, outfile, protocol=-1)

# with open('samples4','w') as outfile:
示例#22
0
    gamma_a_0=0.5,
    gamma_b_0=3.0,
    init_state_concentration=6.0,
    obs_distns=[
        d.MNIW(len(dmu) + 2, 5 * np.diag(np.diag(dcov)), np.zeros((len(dmu), 2 * len(dmu))), 10 * np.eye(2 * len(dmu)))
        for state in range(Nmax)
    ],
    # dur_distns=[pyhsmm.basic.distributions.GeometricDuration(100,2000) for state in range(Nmax)],
    # dur_distns=[pyhsmm.basic.distributions.PoissonDuration(3*20,3) for state in range(Nmax)],
    dur_distns=[
        pyhsmm.basic.distributions.NegativeBinomialDuration(10 * 10.0, 1.0 / 10.0, 3 * 10.0, 1 * 10.0)
        for state in range(Nmax)
    ],
)

for i, t in enumerate(tracks):
    model.add_data_parallel(i)

########################
#  try some inference  #
########################
plt.figure()
for itr in progprint_xrange(200):
    if itr % 5 == 0:
        plt.gcf().clf()
        model.plot()
        plt.ion()
        plt.draw()
        plt.ioff()
    model.resample_model_parallel()
示例#23
0
def main(result_dir=None):
    if result_dir == None:
        result_dir = get_result_dir(__file__)
    os.mkdir(result_dir)
    param_path = os.path.join(result_dir, 'parameter.json')
    fig_title_path = os.path.join(result_dir, 'fig_title.json')
    #initialize model params#
    # O(T*L_max*W_max^2*d_max^3)
    ITER_N = 10
    LETTER_N = 7
    WORD_N = 7
    DATA_N = 60
    obs_dim = 3
    model_hypparams = {'state_dim': WORD_N, 'alpha': 10.0, 'gamma': 10.0}
    word_model_params = {'letter_type': LETTER_N, 'rho': 10}
    obs_hypparams = {
        'mu_0': np.zeros(obs_dim),
        'sigma_0': np.eye(obs_dim),
        'kappa_0': 0.01,
        'nu_0': obs_dim + 5
    }
    dur_hypparams = {'alpha_0': 50.0, 'beta_0': 10.0}
    length_dist = pyhsmm.distributions.PoissonDuration(alpha_0=30,
                                                       beta_0=10,
                                                       lmbda=3)
    #setting#
    obs_dists = [Gaussian(**obs_hypparams) for state in range(LETTER_N)]
    dur_dists = [
        pyhsmm.distributions.PoissonDuration(**dur_hypparams)
        for state in range(LETTER_N)
    ]
    letter_hsmm = LLHSMM(init_state_concentration=10,
                         alpha=10.0,
                         gamma=10.0,
                         dur_distns=dur_dists,
                         obs_distns=obs_dists)
    model = DAHSMM(model_hypparams, letter_hsmm, length_dist, obs_dists,
                   dur_dists)
    #dump_object in multi engines#
    #"""
    print "--------------------------------------dump process start--------------------------------------"
    count = multiprocessing.Value('i', 0)
    datatxt_names = glob.glob('./DATA/*.txt')
    datatxt_names.sort()
    for f in datatxt_names:
        input_mat = np.loadtxt(f)
        f = f.replace("./DATA/", "")
        f = f.replace(".txt", "")
        pr = multiprocessing.Process(target=multi_dump_object,
                                     args=(input_mat, model, f, count))
        pr.start()
        time.sleep(0.1)
    while (1):
        if count.value > 55:
            time.sleep(1)
            print "--------------------------------------dump process completed!!--------------------------------------"
            break
    #"""
#add_data in one engine#
    """
    filename = []
    for f in glob.glob('./DATA/*.txt'):
        model.add_data(np.loadtxt(f))
        f = f.replace("./DATA/","")
        filename.append(f.replace(".txt",""))
    """
    #add_data in multi engines#
    print "--------------------------------------add_data process start--------------------------------------"
    filename = []
    datadmp_names = glob.glob('./TMP/*.dump')
    datadmp_names.sort()
    for f in datadmp_names:
        print f, " loading..."
        fp = open(f)
        f = f.replace("./TMP/", "")
        f = f.replace(".dump", "")
        filename.append(f)
        obj = pickle.load(fp)
        model.states_list.append(obj)
        if model.parallel:
            parallel.add_data(model.states_list[-1].data)
        fp.close()
    print "--------------------------------------add_data process completed!!--------------------------------------"
    #save params&charm#
    save_fig_title(fig_title_path, SAVE_PARAMS, locals())
    save_parameters(param_path, SAVE_PARAMS, locals())
    obs_hypparams['sigma_0'] = np.eye(obs_dim)
    obs_hypparams['mu_0'] = np.zeros(obs_dim)
    #estimation&result_write#
    print "--------------------------------------estimation process start--------------------------------------"
    result = Result(result_dir, DATA_N)
    loglikelihood = []
    for idx in progprint_xrange(ITER_N, perline=10):
        model.resample_model()
        loglikelihood.append(result.save_loglikelihood(model))
        result.save(model)
    result.write_loglikelihood(loglikelihood)
    print "--------------------------------------estimation process completed!!--------------------------------------"
示例#24
0
        [pyhsmm.distributions.NegativeBinomialIntegerR2Duration(
            **dur_hypparams) for superstate in range(Nmaxsuper)]

model = models.WeakLimitHDPHSMMSubHMMs(
        init_state_concentration=6.,
        sub_init_state_concentration=6.,
        alpha=6.,gamma=6.,
        sub_alpha=6.,sub_gamma=6.,
        obs_distnss=obs_distnss,
        dur_distns=dur_distns)

model.add_data(data,stateseq=_)
model.resample_parameters()
model.resample_parameters()
model.resample_parameters()

###############
#  inference  #
###############
for itr in progprint_xrange(5):
    model.resample_model()

plt.figure()
model.plot()
plt.gcf().suptitle('fit')

s = model.states_list[0]

plt.show()

示例#25
0
#             'J_0':np.ones(obs_dim) * 0.001, #sq_0 #changes the hidden state detection (the lower the better) #0.001
#             'alpha_0':np.ones(obs_dim) * 0.1, #(make the number of hidden states worse higher the better)
#             'beta_0':np.ones(obs_dim) * 1}

dur_hypparams = {'alpha_0':2*30*500,
                 'beta_0':3}

obs_distns = [distributions.Gaussian(**obs_hypparams) for state in range(Nmax)]
dur_distns = [distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)]

datas = online_df.loc[online_df['indicator']==3, ['p_del','p_dup']].values

posteriormodel = pyhsmm.models.WeakLimitHDPHSMM(
        # alpha=6.,gamma=6., # better to sample over these; see concentration-resampling.py 10,1
        alpha=1.,gamma=1./4,
        init_state_concentration=600., # pretty inconsequential
        obs_distns=obs_distns,
        dur_distns=dur_distns)
posteriormodel.add_data(datas,trunc=80)
for idx in progprint_xrange(40): # 100->50
    posteriormodel.resample_model()#num_procs=1)

exp_state = np.empty(len(online_df), dtype=int)
indicators = online_df['indicator'].values
exp_state[indicators==3] = posteriormodel.stateseqs[0]
online_df['exp_state']=exp_state
online_df.to_csv('/zfssz2/ST_MCHRI/BIGDATA/PROJECT/NIPT_CNV/f_cnv_out/online/NA12878/out1.csv', sep='\t')



示例#26
0
            np.r_[0,0,0,0,0,1.,1.,1.], # discrete distribution uniform over {6,7,8}
            9,1, # average geometric success probability 1/(9+1)
            ) for state in range(Nmax)]

model = pyhsmm.models.HSMMIntNegBin(init_state_concentration=10.,
                                    alpha=6.,
                                    gamma=6.,
                                    obs_distns=obs_distns,
                                    dur_distns=dur_distns)
model.add_data(data)

##############
#  resample  #
##############

for itr in progprint_xrange(10):
    model.resample_model()

################
#  viterbi EM  #
################

for itr in progprint_xrange(50):
    model.Viterbi_EM_step()

##########
#  plot  #
##########

plt.figure()
model.plot()
示例#27
0
文件: hmm-EM.py 项目: tanichu/NPB_DAA
obs_distns = [
    pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(N)
]

# Build the HMM model that will represent the fitmodel
fitmodel = pyhsmm.models.HMM(
    alpha=50.,
    gamma=50.,
    init_state_concentration=50.,  # these are only used for initialization
    obs_distns=obs_distns)
fitmodel.add_data(data)

print 'Gibbs sampling for initialization'

for idx in progprint_xrange(25):
    fitmodel.resample_model()

plt.figure()
fitmodel.plot()
plt.gcf().suptitle('Gibbs-sampled initialization')

print 'EM'

fitmodel.EM_fit()

plt.figure()
fitmodel.plot()
plt.gcf().suptitle('EM fit')
plt.show()
示例#28
0
Kmax = 10                           # number of latent discrete states
D_latent = 2                        # latent linear dynamics' dimension
D_obs = 2                           # data dimension

Cs = [np.eye(D_obs) for _ in range(Kmax)]                   # Shared emission matrices
sigma_obss = [0.05 * np.eye(D_obs) for _ in range(Kmax)]    # Emission noise covariances

model = DefaultSLDS(
    K=Kmax, D_obs=D_obs, D_latent=D_latent,
    Cs=Cs, sigma_obss=sigma_obss)

model.add_data(data)
model.resample_states()

for _ in progprint_xrange(10):
    model.resample_model()
model.states_list[0]._init_mf_from_gibbs()


####################
#  run mean field  #
####################

vlbs = []
for _ in progprint_xrange(50):
    vlbs.append(model.meanfield_coordinate_descent_step())

plt.figure()
plt.plot(vlbs)
plt.xlabel("Iteration")
示例#29
0
文件: vbem.py 项目: zqwei/pyslds
                        K_0=D_latent * np.eye(D_latent + D_input),
                    ) for _ in range(Kmax)
                ],
                emission_distns=DiagonalRegression(
                    D_obs,
                    D_latent + D_input,
                    alpha_0=2.0,
                    beta_0=1.0,
                ),
                alpha=3.,
                init_state_distn='uniform')

model.add_data(data, inputs=np.ones((T, D_input)))
model.resample_states()

for _ in progprint_xrange(0):
    model.resample_model()
model.states_list[0]._init_mf_from_gibbs()

####################
#  run mean field  #
####################

vlbs = []
for _ in progprint_xrange(N_iter):
    model.VBEM_step()
    vlbs.append(model.VBEM_ELBO())
    if len(vlbs) > 1:
        assert vlbs[-1] > vlbs[-2] - 1e-8

plt.figure()
示例#30
0
文件: hsmm-geo.py 项目: 52nlp/pyhsmm
temp = np.concatenate(((0,),truemodel.states_list[0].durations.cumsum()))
changepoints = zip(temp[:-1],temp[1:])
changepoints[-1] = (changepoints[-1][0],T) # because last duration might be censored



#########################
#  posterior inference  #
#########################

Nmax = 25

obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)]
dur_distns = [pyhsmm.distributions.GeometricDuration(**dur_hypparams) for state in range(Nmax)]

posteriormodel = pyhsmm.models.GeoHSMMPossibleChangepoints(
        alpha=6.,
        init_state_concentration=6.,
        obs_distns=obs_distns,
        dur_distns=dur_distns)
posteriormodel.add_data(data,changepoints=changepoints)

for idx in progprint_xrange(50):
    posteriormodel.resample_model()

plt.figure()
posteriormodel.plot()

plt.show()
示例#31
0
    nu_0=3, S_0=np.eye(2), M_0=np.zeros((2,4+affine)),
    K_0=np.eye(4+affine), affine=affine) for state in range(Nmax)]

dur_distns=[NegativeBinomialIntegerR2Duration(
    r_discrete_distn=np.ones(10.),alpha_0=1.,beta_0=1.) for state in range(Nmax)]

model = m.ARWeakLimitHDPHSMMIntNegBin(
        alpha=4.,gamma=4.,init_state_concentration=10.,
        obs_distns=obs_distns,
        dur_distns=dur_distns,
        )

model.add_data(data)

###############
#  inference  #
###############

for itr in progprint_xrange(100):
    model.resample_model()

plt.figure()
model.plot()

plt.figure()
colors = ['b','r','y','k','g']
stateseq = model.states_list[0].stateseq
for i,s in enumerate(np.unique(stateseq)):
    plt.plot(data[s==stateseq,0],data[s==stateseq,1],colors[i % len(colors)] + 'o')

示例#32
0
文件: hmm-EM.py 项目: chiaolun/pyhsmm
obs_hypparams = {'mu_0':np.zeros(obs_dim),
                'sigma_0':np.eye(obs_dim),
                'kappa_0':0.25,
                'nu_0':obs_dim+2}

obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(N)]

# Build the HMM model that will represent the fitmodel
fitmodel = pyhsmm.models.HMM(
        alpha=50.,init_state_concentration=50., # these are only used for initialization
        obs_distns=obs_distns)
fitmodel.add_data(data)

print('Gibbs sampling for initialization')

for idx in progprint_xrange(25):
    fitmodel.resample_model()

plt.figure()
fitmodel.plot()
plt.gcf().suptitle('Gibbs-sampled initialization')

print('EM')

likes = fitmodel.EM_fit()

plt.figure()
fitmodel.plot()
plt.gcf().suptitle('EM fit')

plt.figure()
test_data, _ = truemodel.generate(1000)

################
#  fit models  #
################

Nmax = 10

### GMM

obs_distns = [pyhsmm.distributions.Categorical(alpha_0=0.5,K=N) for state in xrange(Nmax)]
gmm = pyhsmm.basic.models.Mixture(alpha_0=N,components=obs_distns)

for training_data in training_datas:
    gmm.add_data(training_data)
for itr in progprint_xrange(resample_iter):
    gmm.resample_model()

# gmm.delayed_log_likelihood = lambda test_data,prefix,start,end: gmm.log_likelihood(test_data[start:end+1])
gmm.predict_log_likelihood = lambda test_data,prefix: gmm._log_likelihoods(test_data[prefix:])
# gmm.predict_log_likelihood_fixedhorizons = lambda test_data,max_horizon: [gmm._log_likelihoods(test_data[t:]) for t in range(max_horizon)]

### HMM

obs_distns = [pyhsmm.distributions.Categorical(alpha_0=0.5,K=N) for state in xrange(Nmax)]
hmm = pyhsmm.models.StickyHMMEigen(alpha=10,gamma=10,init_state_concentration=10,kappa=30,
                obs_distns=obs_distns)

for training_data in training_datas:
    hmm.add_data(training_data)
for itr in progprint_xrange(resample_iter):
示例#34
0
    weights=row)
    for row in init_weights]

################
#  build HSMM  #
################

dur_distns = [NegativeBinomialIntegerRVariantDuration(np.r_[0.,0,0,1,1,1,1,1],alpha_0=5.,beta_0=5.)
        for state in range(library_size)]

model = LibraryHSMMIntNegBinVariant(
        init_state_concentration=10.,
        alpha=6.,gamma=6.,
        obs_distns=obs_distns,
        dur_distns=dur_distns)

model.add_data(data)

#####################
#  sample and save  #
#####################

models = [model.resample_and_copy() for itr in progprint_xrange(100)]

with open('models.pickle','w') as outfile:
    cPickle.dump(models,outfile,protocol=-1)

with open('model.pickle','w') as outfile:
    cPickle.dump(models[0],outfile,protocol=-1)

示例#35
0
Nmax = 20
model = m.ARHMM(
        nlags=2,
        alpha=4.,gamma=4.,init_state_concentration=10.,
        obs_distns=[d.MNIW(dof=3,S=np.eye(2),M=np.zeros((2,4)),K=np.eye(4)) for state in range(Nmax)],
        )

model.add_data(data)

######################
#  do DAT INFERENCE  #
######################

print 'Gibbs sampling initialization'
for itr in progprint_xrange(5):
    model.resample_model()

print 'EM'
for itr in progprint_xrange(50):
    model.EM_step()

plt.figure()
model.plot()

plt.figure()
colors = ['b','r','y','k','g']
stateseq = model.states_list[0].stateseq
for i,s in enumerate(np.unique(stateseq)):
    plt.plot(data[s==stateseq,0],data[s==stateseq,1],colors[i % len(colors)] + 'o')
    alpha_0=components_per_gmm,
    weights=row)
    for row in init_weights]

##############
#  build MM  #
##############

model = LibraryMM(
        alpha_0=6.,
        components=meta_components)

model.add_data(data)

##################
#  infer things  #
##################

for i in progprint_xrange(50):
    model.resample_model()

plt.figure()
truemodel.plot()
plt.gcf().suptitle('truth')

plt.figure()
model.plot()
plt.gcf().suptitle('inferred')

plt.show()
示例#37
0
plt.figure()
model.plot()
plt.gcf().suptitle('subHSMM sampled model after {} iterations'.format(ITERATIONS))
plt.savefig('plots/' + testcase + '/subhmm.png')
plt.close()
s = model.states_list[0] 
"""

### HDP-HMM without the sticky bias

obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)]
posteriormodel = pyhsmm.models.WeakLimitHDPHMM(alpha=6.,gamma=6.,init_state_concentration=1.,
                                   obs_distns=obs_distns)
posteriormodel.add_data(data)

for idx in progprint_xrange(ITERATIONS):
    posteriormodel.resample_model()

posteriormodel.plot()
plt.gcf().suptitle('HDP-HMM sampled model after {} iterations'.format(ITERATIONS))
plt.savefig('plots/' + testcase + '/hdp-hmm.png')
plt.close() 

# Some more hypparams

obs_hypparams = {'mu_0':np.zeros(obs_dim),
                'sigma_0':np.eye(obs_dim),
                'kappa_0':0.3,
                'nu_0':obs_dim+5}
dur_hypparams = {'alpha_0':2,
                 'beta_0':2}
示例#38
0
obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)]
dur_distns = [pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)]

posteriormodel = pyhsmm.models.HSMM(
        alpha=6.,gamma=6., # these can matter; better to sample over them (concentration-resampling.py)
        init_state_concentration=6., # pretty inconsequential
        obs_distns=obs_distns,
        dur_distns=dur_distns,
        trunc=90) # duration truncation speeds things up when it's possible 
        # xun: trunc was 60 in demo
        
posteriormodel.add_data(data0)
posteriormodel.add_data(data1)
posteriormodel.add_data(data2)

models = []
for idx in progprint_xrange(11): # xun: we use 31 instead of 150
    posteriormodel.resample_model()
    if (idx+1) % 10 == 0:
        models.append(copy.deepcopy(posteriormodel))

fig = plt.figure()
for idx, model in enumerate(models):
    plt.clf()
    model.plot()
    plt.gcf().suptitle('HDP-HSMM sampled after %d iterations' % (10*(idx+1)))
    if SAVE_FIGURES:
        plt.savefig('iter_%.3d.png' % (10*(idx+1)))

plt.show()
示例#39
0
##################

Nmax = 10
affine = True
nlags = 2
model = m.ARWeakLimitStickyHDPHMM(
    alpha=4.,
    gamma=4.,
    kappa=50.,
    init_state_distn='uniform',
    obs_distns=[
        d.AutoRegression(nu_0=2.5,
                         S_0=2.5 * np.eye(2),
                         M_0=np.zeros((2, 2 * nlags + affine)),
                         K_0=10 * np.eye(2 * nlags + affine),
                         affine=affine) for state in range(Nmax)
    ],
)

model.add_data(data)

###############
#  inference  #
###############

fig = model.make_figure()
model.plot(fig=fig, draw=False)
for _ in progprint_xrange(300):
    model.resample_model()
    model.plot(fig=fig, update=True)
示例#40
0
文件: meanfield.py 项目: zqwei/pyslds
D_obs = 2  # data dimension

Cs = [np.eye(D_obs) for _ in range(Kmax)]  # Shared emission matrices
sigma_obss = [0.05 * np.eye(D_obs)
              for _ in range(Kmax)]  # Emission noise covariances

model = DefaultSLDS(K=Kmax,
                    D_obs=D_obs,
                    D_latent=D_latent,
                    Cs=Cs,
                    sigma_obss=sigma_obss)

model.add_data(data)
model.resample_states()

for _ in progprint_xrange(10):
    model.resample_model()
model.states_list[0]._init_mf_from_gibbs()

####################
#  run mean field  #
####################

vlbs = []
for _ in progprint_xrange(50):
    vlbs.append(model.meanfield_coordinate_descent_step())

plt.figure()
plt.plot(vlbs)
plt.xlabel("Iteration")
plt.ylabel("VLB")
示例#41
0
        A=np.eye(D),sigma=0.1*np.eye(D), # TODO remove special case
        nu_0=5,S_0=np.eye(D),M_0=np.zeros((D,P)),K_0=np.eye(P))
    for _ in xrange(Nmax)]

init_dynamics_distns = [
    Gaussian(nu_0=5,sigma_0=np.eye(P),mu_0=np.zeros(P),kappa_0=1.)
    for _ in xrange(Nmax)]

model = WeakLimitStickyHDPHMMSLDS(
    dynamics_distns=dynamics_distns,
    emission_distns=emission_distns,
    init_dynamics_distns=init_dynamics_distns,
    kappa=50.,alpha=5.,gamma=5.,init_state_concentration=1.)


##################
#  run sampling  #
##################

def resample():
    model.resample_model()
    return model.stateseqs[0].copy()


model.add_data(data)
samples = [resample() for _ in progprint_xrange(1000)]

plt.matshow(np.vstack(samples+[np.tile(labels,(10,1))]))

plt.show()
        np.ones((10, )), alpha_0=alpha_0, beta_0=beta_0)
    for state in range(n_states)
]

hsmm = library_models.LibraryHSMMIntNegBinVariant(
    init_state_concentration=10.,
    alpha_a_0=1.0,
    alpha_b_0=1. / 10,
    gamma_a_0=1,
    gamma_b_0=1,
    # alpha=2, gamma=20.0,
    obs_distns=obs_distns,
    dur_distns=dur_distns)
hsmm.add_data(training_data)

##########################
#  Gather model samples  #
##########################

for itr in progprint_xrange(num_iter):
    hsmm.resample_model()

hsmms = [hsmm.resample_and_copy() for itr in progprint_xrange(1)]

##########
#  Save  #
##########

with open('/scratch/hsmm_results.pickle', 'w') as outfile:
    cPickle.dump(hsmms, outfile, protocol=-1)
示例#43
0
文件: arhmm.py 项目: mattjj/next.ml
##################

Nmax = 10
affine = True
nlags = 2
model = m.ARWeakLimitStickyHDPHMM(
    alpha=4.,gamma=4.,kappa=50.,
    init_state_distn='uniform',
    obs_distns=[
        d.AutoRegression(
            nu_0=2.5,
            S_0=2.5*np.eye(2),
            M_0=np.zeros((2,2*nlags+affine)),
            K_0=10*np.eye(2*nlags+affine),
            affine=affine)
        for state in range(Nmax)],
)

model.add_data(data)

###############
#  inference  #
###############

fig = model.make_figure()
model.plot(fig=fig,draw=False)
for _ in progprint_xrange(300):
    model.resample_model()
    model.plot(fig=fig,update=True)

    NegativeBinomialIntegerRVariantDuration(np.r_[0., 0, 0, 0, 0, 0, 1, 1, 1,
                                                  1],
                                            alpha_0=5.,
                                            beta_0=5.)
    for state in range(library_size)
]

hsmm = LibraryHSMMIntNegBinVariant(init_state_concentration=10.,
                                   alpha=6.,
                                   gamma=2.,
                                   obs_distns=obs_distns,
                                   dur_distns=dur_distns)
for data in training_datas:
    hsmm.add_data(data, left_censoring=True)

for itr in progprint_xrange(resample_iter):
    hsmm.resample_model()

### degrade into HMM, use the same learned syllables!

hmm = LibraryHMMFixedObs(init_state_concentration=10.,
                         alpha=6.,
                         gamma=2.,
                         obs_distns=hsmm.obs_distns)
for data in training_datas:
    hmm.add_data(data)

for itr in progprint_xrange(resample_iter):
    hmm.resample_model()

### degrade into GMM, use the same learned syllables!
示例#45
0
        for state in range(library_size)]

model = LibraryHSMMIntNegBinVariant(
        init_state_concentration=10.,
        alpha=6.,gamma=6.,
        obs_distns=obs_distns,
        dur_distns=dur_distns)

for data in training_datas:
    model.add_data(data,left_censoring=True)
    # model.add_data_parallel(data,left_censoring=True)

##################
#  infer things  #
##################

train_likes = []
test_likes = []

for i in progprint_xrange(5):
    model.resample_model()
    # model.resample_model_parallel()
    train_likes.append(model.log_likelihood())
    # test_likes.append(model.log_likelihood(test_data,left_censoring=True))

newmodel = model.unfreeze()

for i in progprint_xrange(5):
    newmodel.resample_model()

示例#46
0
            np.r_[0,0,0,0,0,1.,1.,1.], # discrete distribution uniform over {6,7,8}
            alpha_0=9,beta_0=1, # average geometric success probability 1/(9+1)
            ) for state in range(Nmax)]

model  = pyhsmm.models.HSMMIntNegBinVariant(
        init_state_concentration=10.,
        alpha=6.,gamma=6.,
        obs_distns=obs_distns,
        dur_distns=dur_distns)
model.add_data(data,left_censoring=True)

##############
#  resample  #
##############

for itr in progprint_xrange(10):
    model.resample_model()

################
#  viterbi EM  #
################

model.Viterbi_EM_fit()

##########
#  plot  #
##########

plt.figure()
model.plot()
    component.add_data(data=observations)
    components.append(component)



# Data structure to collect results
results = []#pd.DataFrame(columns=['Log-Likelihood', 'Accuracy', 'Precision', 'Recall', 'F1'])

# Collect stats into the dataframe
stats = collect_stats(components, real_states, observations, testing, weights, np.ones((1, testing.shape[1])), Model)
results.append(list(stats))


# This is Gibbs sampling
for itr in progprint_xrange(ITER):
    # In each resamle, we are going to fix all the chains except one, then resample
    # that chain given all the other fixed ones. After doing this, a single
    # resample of the factorial model is done

    # Resample the variances
    states = np.matrix(np.zeros((COMP+1, observations.shape[0])))
    states[-1, :] = 1

    for i, component in enumerate(components):
        states[i, :] = component.states_list[0].stateseq

    # Now compute the means
    means = np.matrix(weights)*states
    # Squared summed error
    sse = np.power(observations - means.T, 2).sum(axis=0)
示例#48
0
文件: hmm.py 项目: mattjj/next.ml
                     nu_0=obs_dim + 2)

obs_distns = \
    [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)]
model = pyhsmm.models.WeakLimitStickyHDPHMM(kappa=50.,
                                            alpha=6.,
                                            gamma=6.,
                                            init_state_concentration=1.,
                                            obs_distns=obs_distns)
model.add_data(data)

# run inference!
fig = model.make_figure()
model.plot(fig=fig, draw=False)

for _ in progprint_xrange(250):
    model.resample_model()
    model.plot(fig=fig, update=True)

# from moviepy.video.io.bindings import mplfig_to_npimage
# from moviepy.editor import VideoClip

# fig = model.make_figure()
# model.plot(fig=fig,draw=False)

# def make_frame_mpl(t):
#     model.resample_model()
#     model.plot(fig=fig,update=True,draw=False)
#     return mplfig_to_npimage(fig)

# animation = VideoClip(make_frame_mpl, duration=10)
示例#49
0
def find_states(features_file):
    meeting_base = features_file.split('/')[-1]
    print(meeting_base)

    data = np.genfromtxt(features_file, delimiter=',')
    # data = np.load(features_file)

    labels = pd.read_csv(os.path.join(labels_dir, meeting_base))
    true_seq = labels['combined']

    # features_file = open(features_file, 'rb')
    # data = pickle.load(features_file)

    ##########################
    #     Sticky-HDP-HMM     #
    ##########################

    # and some hyperparameters
    obs_dim = data.shape[1]
    obs_hypparams = {'mu_0':np.zeros(obs_dim),
                    'sigma_0':np.eye(obs_dim),
                    'kappa_0':0.25,
                    'nu_0':obs_dim+2}

    # create a bunch of multivariate gaussians
    obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in xrange(Nmax)]

    # parameters for priors taken from Fox 2012
    gamma_draw = gamma(12,2)
    alpha_plus_kappa_draw = gamma(6,1)
    sigma_draw = gamma(1,0.5)
    rho_draw = beta(500,5)

    # can deterministically retrieve kappa and alpha from draws for alpha+kappa and rho
    kappa = rho_draw * alpha_plus_kappa_draw
    alpha = (1-rho_draw) * alpha_plus_kappa_draw

    print('kappa: {}, alpha: {}, gamma: {}'.format(kappa, alpha, gamma_draw))
    # ipdb.set_trace()

    obs_hypparams = {'mu_0':np.zeros(obs_dim),
                    'sigma_0':np.eye(obs_dim),
                    'kappa_0':0.3,
                    'nu_0':obs_dim+5}
    dur_hypparams = {'alpha_0':2*30,
                     'beta_0':2,
                     'lmbda':2.5
                     }

    obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)]
    dur_distns = [pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)]

    # ipdb.set_trace()

    posteriormodel = pyhsmm.models.WeakLimitStickyHDPHMM(
            # NOTE: instead of passing in alpha_0 and gamma_0, we pass in parameters
            # for priors over those concentration parameters
            kappa=kappa,alpha=alpha,gamma=gamma_draw,
            init_state_concentration=6.,
            obs_distns=obs_distns)

    # ipdb.set_trace()
    # posteriormodel = pyhsmm.models.WeakLimitHDPHSMM(
    #         alpha=alpha,gamma=gamma_draw,init_state_concentration=1.,
    #         obs_distns=obs_distns,
    #         dur_distns=dur_distns)

    # data = np.zeros(data.shape)

    # posteriormodel = pyhsmm.models.WeakLimitHDPHSMM(
    #     # NOTE: instead of passing in alpha_0 and gamma_0, we pass in parameters
    #     # for priors over those concentration parameters
    #     alpha_a_0=1.,alpha_b_0=1./4,
    #     gamma_a_0=1.,gamma_b_0=1./4,
    #     init_state_concentration=6.,
    #     obs_distns=obs_distns,
    #     dur_distns=dur_distns)
    posteriormodel.add_data(data)

    # for idx in progprint_xrange(100):
    #     posteriormodel.resample_model()

    # plt.figure()
    # posteriormodel.plot()
    # plt.gcf().suptitle('Sampled after 100 iterations')

    # plt.figure()
    # t = np.linspace(0.01,30,1000)
    # plt.plot(t,scipy.stats.gamma.pdf(t,1.,scale=4.)) # NOTE: numpy/scipy scale is inverted compared to my scale
    # plt.title('Prior on concentration parameters')

    # plt.show()


    # posteriormodel.add_data(data)
    # # ipdb.set_trace()

    num_cpu = 0
    # # num_iterations = 1
    all_trans_matrices = []
    for idx in progprint_xrange(num_iterations):
        posteriormodel.resample_model(num_procs=num_cpu)
        trans_matrix = np.array([row.weights for row in posteriormodel.trans_distn._row_distns])
        all_trans_matrices.append(trans_matrix)

    # final_trans_matrix = np.mean(np.array(all_trans_matrices), axis=0) # average transition probs
    # # ipdb.set_trace()
    # for i in range(len(final_trans_matrix)):
    #     posteriormodel.trans_distn._row_distns[i].weights = final_trans_matrix[i]

    # trans_matrix = np.array([row.weights for row in posteriormodel.trans_distn._row_distns])
    # print trans_matrix

    # posteriormodel.resample_model(num_procs=num_cpu)

    # dump state sequence information
    hmm_states = posteriormodel.states_list[0]

    # ipdb.set_trace()
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    with open(os.path.join(output_dir, meeting_base + '.pickle'), 'wb') as outf:
        cPickle.dump(hmm_states, outf)

    num_states = len(set(hmm_states.stateseq_norep))
    average_duration = np.average(hmm_states.durations) / 10.0
    print('Num States: {}'.format(num_states))
    print('Average Duration: {}'.format('{0:.2f}'.format(average_duration)))
    diarization_error, best_seq, _ = hdphmm_utils.find_error_rate(hmm_states.stateseq, true_seq)
    diarization_error = '{0:.3f}'.format(diarization_error)
    print('DER: {}'.format(diarization_error))

    hdphmm_utils.plot_pred_labels(meeting_base, best_seq, labels_dir, plot_dir, diarization_error)


    # EM PLOTTING
    # plt.figure()
    # posteriormodel.plots()
    # plt.gcf().suptitle('Gibbs-sampled initialization')

    # print 'EM'

    # likes = posteriormodel.EM_fit()

    # plt.figure()
    # posteriormodel.plot()
    # plt.gcf().suptitle('EM fit')

    # plt.figure()
    # plt.plot(likes)
    # plt.gcf().suptitle('log likelihoods during EM')

    # plt.show()
    # DONE


    # posteriormodel.plot()
    # plt.gcf().suptitle('Sticky HDP-HMM sampled model: {}\n \
    #                     Num States: {}, Avg Duration: {}s, Num Iterations: {}'\
    #                     .format(filebase, num_states, 
    #                         '{0:.2f}'.format(average_duration), num_iterations))
    # plt.savefig(os.path.join(output_dir, 'plots', filebase + '_' + str(Nmax) + '.png'))
    # plt.show()
    # plt.cla()
    # plt.clf()
    print('')

    return float(diarization_error)
################

dur_distns = [NegativeBinomialIntegerRVariantDuration(np.r_[0.,0,0,1,1,1,1,1],alpha_0=5.,beta_0=5.)
        for state in range(Nmax)]

model = LibraryHSMMIntNegBinVariant(
        init_state_concentration=10.,
        alpha=6.,gamma=6.,
        obs_distns=obs_distns,
        dur_distns=dur_distns)

for data in datas:
    model.add_data_parallel(data,left_censoring=True)

##################
#  infer things  #
##################

for i in progprint_xrange(25):
    model.resample_model_parallel()

# plt.figure()
# truemodel.plot()
# plt.gcf().suptitle('truth')

# plt.figure()
# model.plot()
# plt.gcf().suptitle('inferred')

# plt.show()
示例#51
0
#########################

Nmax = 25

obs_distns = [pyhsmm.distributions.Gaussian(**obs_hypparams) for state in range(Nmax)]
dur_distns = [pyhsmm.distributions.PoissonDuration(**dur_hypparams) for state in range(Nmax)]

posteriormodel = pyhsmm.models.HSMM(
        # NOTE: instead of passing in alpha_0 and gamma_0, we pass in parameters
        # for priors over those concentration parameters
        alpha_a_0=1.,alpha_b_0=1./4,
        gamma_a_0=1.,gamma_b_0=1./4,
        init_state_concentration=6.,
        obs_distns=obs_distns,
        dur_distns=dur_distns,trunc=70)
posteriormodel.add_data(data)

for idx in progprint_xrange(100):
    posteriormodel.resample_model()

plt.figure()
posteriormodel.plot()
plt.gcf().suptitle('Sampled after 100 iterations')

plt.figure()
t = np.linspace(0.01,30,1000)
plt.plot(t,stats.gamma.pdf(t,1.,scale=4.)) # NOTE: numpy/scipy scale is inverted compared to my scale
plt.title('Prior on concentration parameters')

plt.show()
        for state in range(library_size)]

model = LibraryHSMMIntNegBinVariant(
        init_state_concentration=10.,
        alpha=6.,gamma=6.,
        obs_distns=obs_distns,
        dur_distns=dur_distns)

for data in training_datas:
    model.add_data(data,left_censoring=True)

##################
#  infer things  #
##################

samples1 = [model.resample_and_copy() for i in progprint_xrange(1)]
samples2 = [model.resample_and_copy() for i in progprint_xrange(10)]
samples3 = [model.resample_and_copy() for i in progprint_xrange(100)]
# samples4 = [model.resample_and_copy() for i in progprint_xrange(1000)]

import cPickle
with open('samples1','w') as outfile:
    cPickle.dump(samples1,outfile,protocol=-1)

with open('samples2','w') as outfile:
    cPickle.dump(samples2,outfile,protocol=-1)

with open('samples3','w') as outfile:
    cPickle.dump(samples3,outfile,protocol=-1)

# with open('samples4','w') as outfile: