def _accumulate_sufficient_statistics(self, stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice, params): super(GMMHMM, self)._accumulate_sufficient_statistics(stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice, params) for state, g in enumerate(self.gmms_): _, lgmm_posteriors = g.score_samples(obs) lgmm_posteriors += np.log(posteriors[:, state][:, np.newaxis] + np.finfo(np.float).eps) gmm_posteriors = np.exp(lgmm_posteriors) tmp_gmm = GMM(g.n_components, covariance_type=g.covariance_type) n_features = g.means_.shape[1] tmp_gmm._set_covars( distribute_covar_matrix_to_match_covariance_type( np.eye(n_features), g.covariance_type, g.n_components)) norm = tmp_gmm._do_mstep(obs, gmm_posteriors, params) if np.any(np.isnan(tmp_gmm.covars_)): raise ValueError stats['norm'][state] += norm if 'm' in params: stats['means'][state] += tmp_gmm.means_ * norm[:, np.newaxis] if 'c' in params: if tmp_gmm.covariance_type == 'tied': stats['covars'][state] += tmp_gmm.covars_ * norm.sum() else: cvnorm = np.copy(norm) shape = np.ones(tmp_gmm.covars_.ndim) shape[0] = np.shape(tmp_gmm.covars_)[0] cvnorm.shape = shape stats['covars'][state] += tmp_gmm.covars_ * cvnorm
def _accumulate_sufficient_statistics(self, stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice, params): super(GMMHMM, self)._accumulate_sufficient_statistics( stats, obs, framelogprob, posteriors, fwdlattice, bwdlattice, params) for state, g in enumerate(self.gmms_): _, lgmm_posteriors = g.score_samples(obs) lgmm_posteriors += np.log(posteriors[:, state][:, np.newaxis] + np.finfo(np.float).eps) gmm_posteriors = np.exp(lgmm_posteriors) tmp_gmm = GMM(g.n_components, covariance_type=g.covariance_type) n_features = g.means_.shape[1] tmp_gmm._set_covars( distribute_covar_matrix_to_match_covariance_type( np.eye(n_features), g.covariance_type, g.n_components)) norm = tmp_gmm._do_mstep(obs, gmm_posteriors, params) if np.any(np.isnan(tmp_gmm.covars_)): raise ValueError stats['norm'][state] += norm if 'm' in params: stats['means'][state] += tmp_gmm.means_ * norm[:, np.newaxis] if 'c' in params: if tmp_gmm.covariance_type == 'tied': stats['covars'][state] += tmp_gmm.covars_ * norm.sum() else: cvnorm = np.copy(norm) shape = np.ones(tmp_gmm.covars_.ndim) shape[0] = np.shape(tmp_gmm.covars_)[0] cvnorm.shape = shape stats['covars'][state] += tmp_gmm.covars_ * cvnorm
def _init(self, X, lengths=None): super(GaussianHMM, self)._init(X, lengths=lengths) _, n_features = X.shape if hasattr(self, 'n_features') and self.n_features != n_features: raise ValueError('Unexpected number of dimensions, got %s but ' 'expected %s' % (n_features, self.n_features)) self.n_features = n_features if 'm' in self.init_params or not hasattr(self, "means_"): # kmeans = cluster.KMeans(n_clusters=self.n_components, # random_state=self.random_state) # kmeans.fit(X) # self.means_ = kmeans.cluster_centers_ # kmeans = cluster.KMeans(n_clusters=self.n_components, max_iter=20,n_init=10) # kmeans.fit(X) # self.means_ = kmeans.cluster_centers_ kmeans = cluster.MiniBatchKMeans(n_clusters=self.n_components,init='k-means++', max_iter=15,n_init=3) with warnings.catch_warnings(): warnings.simplefilter("ignore") kmeans.fit(X) self.means_ = kmeans.cluster_centers_ # gmm = mixture.GMM(n_components=self.n_components, covariance_type='full').fit(X) # self.means_ = gmm.means_ # #Add new code Viet # epsilon = 0.05 # pmax = 0 # for i in range(0,self.n_components): # if (self.means_[i][0] > self.means_[pmax][0]): # pmax = i # pmin = 0 # for i in range(0, self.n_components): # if (self.means_[i][0] < self.means_[pmin][0]): # pmin = i # self.means_[pmin][0] -= epsilon # self.means_[pmax][0] += epsilon # v_max = np.max([x[0] for x in X])+1 # v_min = np.min([x[0] for x in X])-1 # h = (v_max-v_min)/self.n_components # self.means_[0][0] = v_min # for i in range(1,self.n_components): # self.means_[i][0] = self.means_[i-1][0] + h if 'c' in self.init_params or not hasattr(self, "covars_"): cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1]) if not cv.shape: cv.shape = (1, 1) self._covars_ = distribute_covar_matrix_to_match_covariance_type( cv, self.covariance_type, self.n_components).copy()
def _init(self, sequences): """Initialize the state, prior to fitting (hot starting) """ sequences = [ ensure_type(s, dtype=np.float32, ndim=2, name='s') for s in sequences ] self._impl._sequences = sequences small_dataset = np.vstack( sequences[0:min(len(sequences), self.n_hotstart_sequences)]) if 'm' in self.init_params: with warnings.catch_warnings(): warnings.simplefilter("ignore") self.means_ = cluster.KMeans(n_clusters=self.n_states).fit( small_dataset).cluster_centers_ if 'c' in self.init_params: cv = np.cov(small_dataset.T) self.covars_ = \ distribute_covar_matrix_to_match_covariance_type( cv, 'full', self.n_states) self.covars_[self._covars_ == 0] = 1e-5 if 't' in self.init_params: transmat_ = np.empty((self.n_states, self.n_states)) transmat_.fill(1.0 / self.n_states) self.transmat_ = transmat_ self.populations_ = np.ones(self.n_states) / self.n_states if 'a' in self.init_params: self.As_ = np.zeros( (self.n_states, self.n_features, self.n_features)) for i in range(self.n_states): self.As_[i] = np.eye(self.n_features) - self.eps if 'b' in self.init_params: self.bs_ = np.zeros((self.n_states, self.n_features)) for i in range(self.n_states): A = self.As_[i] mean = self.means_[i] self.bs_[i] = np.dot(np.eye(self.n_features) - A, mean) if 'q' in self.init_params: self.Qs_ = np.zeros( (self.n_states, self.n_features, self.n_features)) for i in range(self.n_states): self.Qs_[i] = self.eps * self.covars_[i]
def _init(self, X, lengths=None): super(GaussianHMM, self)._init(X, lengths=lengths) _, n_features = X.shape if hasattr(self, 'n_features') and self.n_features != n_features: raise ValueError('Unexpected number of dimensions, got %s but ' 'expected %s' % (n_features, self.n_features)) self.n_features = n_features if 'm' in self.init_params or not hasattr(self, "means_"): kmeans = cluster.KMeans(n_clusters=self.n_components, random_state=self.random_state) kmeans.fit(X) self.means_ = kmeans.cluster_centers_ if 'c' in self.init_params or not hasattr(self, "covars_"): cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1]) if not cv.shape: cv.shape = (1, 1) self._covars_ = distribute_covar_matrix_to_match_covariance_type( cv, self.covariance_type, self.n_components).copy()
def _init(self, X, lengths=None): super(GaussianHMM, self)._init(X, lengths=lengths) _, n_features = X.shape if hasattr(self, 'n_features') and self.n_features != n_features: raise ValueError('Unexpected number of dimensions, got %s but ' 'expected %s' % (n_features, self.n_features)) self.n_features = n_features if 'm' in self.init_params or not hasattr(self, "means_"): kmeans = cluster.KMeans(n_clusters=self.n_components) kmeans.fit(X) self.means_ = kmeans.cluster_centers_ if 'c' in self.init_params or not hasattr(self, "covars_"): cv = np.cov(X.T) + self.min_covar * np.eye(X.shape[1]) if not cv.shape: cv.shape = (1, 1) self._covars_ = distribute_covar_matrix_to_match_covariance_type( cv, self.covariance_type, self.n_components).copy()
def _init(self, obs, params='stmc'): super(GaussianHMM, self)._init(obs, params=params) if (hasattr(self, 'n_features') and self.n_features != obs[0].shape[1]): raise ValueError('Unexpected number of dimensions, got %s but ' 'expected %s' % (obs[0].shape[1], self.n_features)) self.n_features = obs[0].shape[1] if 'm' in params: self._means_ = cluster.KMeans( n_clusters=self.n_components).fit(obs[0]).cluster_centers_ if 'c' in params: cv = np.cov(obs[0].T) if not cv.shape: cv.shape = (1, 1) self._covars_ = distribute_covar_matrix_to_match_covariance_type( cv, self._covariance_type, self.n_components)
def _init(self, obs, params='stmc'): super(GaussianHMM, self)._init(obs, params=params) if (hasattr(self, 'n_features') and self.n_features != obs[0].shape[1]): raise ValueError('Unexpected number of dimensions, got %s but ' 'expected %s' % (obs[0].shape[1], self.n_features)) self.n_features = obs[0].shape[1] if 'm' in params: self._means_ = cluster.KMeans(n_clusters=self.n_components).fit( obs[0]).cluster_centers_ if 'c' in params: cv = np.cov(obs[0].T) if not cv.shape: cv.shape = (1, 1) self._covars_ = distribute_covar_matrix_to_match_covariance_type( cv, self._covariance_type, self.n_components) self._covars_[self._covars_ == 0] = 1e-5
def _init(self, sequences): """Initialize the state, prior to fitting (hot starting) """ sequences = [ensure_type(s, dtype=np.float32, ndim=2, name="s") for s in sequences] self.inferrer._sequences = sequences small_dataset = np.vstack(sequences[0 : min(len(sequences), self.n_hotstart_sequences)]) # Initialize means with warnings.catch_warnings(): warnings.simplefilter("ignore") self.means_ = cluster.KMeans(n_clusters=self.n_states).fit(small_dataset).cluster_centers_ # Initialize covariances cv = np.cov(small_dataset.T) self.covars_ = distribute_covar_matrix_to_match_covariance_type(cv, "full", self.n_states) self.covars_[self.covars_ == 0] = 1e-5 # Stabilize eigenvalues of matrix for i in range(self.n_states): self.covars_[i] = self.covars_[i] + 1e-5 * np.eye(self.n_features) # Initialize transmat transmat_ = np.empty((self.n_states, self.n_states)) transmat_.fill(1.0 / self.n_states) self.transmat_ = transmat_ self.populations_ = np.ones(self.n_states) / self.n_states # Initialize As self.As_ = np.zeros((self.n_states, self.n_features, self.n_features)) self.bs_ = np.zeros((self.n_states, self.n_features)) for i in range(self.n_states): A = self.As_[i] mean = self.means_[i] self.bs_[i] = np.dot(np.eye(self.n_features) - A, mean) # Initialize means # Initialize local covariances self.Qs_ = np.zeros((self.n_states, self.n_features, self.n_features)) for i in range(self.n_states): self.Qs_[i] = self.eps * self.covars_[i]
def _init(self, obs, params='stmc'): super(GaussianHMM, self)._init(obs, params=params) all_obs = np.concatenate(obs) _, n_features = all_obs.shape if hasattr(self, 'n_features') and self.n_features != n_features: raise ValueError('Unexpected number of dimensions, got %s but ' 'expected %s' % (n_features, self.n_features)) self.n_features = n_features if 'm' in params: kmeans = cluster.KMeans(n_clusters=self.n_components) kmeans.fit(all_obs) self._means_ = kmeans.cluster_centers_ if 'c' in params: cv = np.cov(all_obs.T) if not cv.shape: cv.shape = (1, 1) self._covars_ = distribute_covar_matrix_to_match_covariance_type( cv, self._covariance_type, self.n_components) self._covars_[self._covars_ == 0] = 1e-5
def _init(self, sequences): """Initialize the state, prior to fitting (hot starting) """ sequences = [ensure_type(s, dtype=np.float32, ndim=2, name='s') for s in sequences] self._impl._sequences = sequences small_dataset = np.vstack( sequences[0:min(len(sequences), self.n_hotstart_sequences)]) if 'm' in self.init_params: with warnings.catch_warnings(): warnings.simplefilter("ignore") self.means_ = cluster.KMeans(n_clusters=self.n_states).fit(small_dataset).cluster_centers_ if 'c' in self.init_params: cv = np.cov(small_dataset.T) self.covars_ = \ distribute_covar_matrix_to_match_covariance_type( cv, 'full', self.n_states) self.covars_[self._covars_==0] = 1e-5 if 't' in self.init_params: transmat_ = np.empty((self.n_states, self.n_states)) transmat_.fill(1.0 / self.n_states) self.transmat_ = transmat_ self.populations_ = np.ones(self.n_states) / self.n_states if 'a' in self.init_params: self.As_ = np.zeros((self.n_states, self.n_features, self.n_features)) for i in range(self.n_states): self.As_[i] = np.eye(self.n_features) - self.eps if 'b' in self.init_params: self.bs_ = np.zeros((self.n_states, self.n_features)) for i in range(self.n_states): A = self.As_[i] mean = self.means_[i] self.bs_[i] = np.dot(np.eye(self.n_features) -A, mean) if 'q' in self.init_params: self.Qs_ = np.zeros((self.n_states, self.n_features, self.n_features)) for i in range(self.n_states): self.Qs_[i] = self.eps * self.covars_[i]
def _init(self, X, lengths=None, params='stmc'): super(GaussianHMM, self)._init(X, lengths=lengths, params=params) _, n_features = X.shape if hasattr(self, 'n_features') and self.n_features != n_features: raise ValueError('Unexpected number of dimensions, got %s but ' 'expected %s' % (n_features, self.n_features)) self.n_features = n_features if 'm' in params or not hasattr(self, "means_"): kmeans = cluster.KMeans(n_clusters=self.n_components) kmeans.fit(X) self.means_ = kmeans.cluster_centers_ if 'c' in params or not hasattr(self, "covars_"): cv = np.cov(X.T) if not cv.shape: cv.shape = (1, 1) self._covars_ = distribute_covar_matrix_to_match_covariance_type( cv, self.covariance_type, self.n_components) self._covars_ = self._covars_.copy() if self._covars_.any() == 0: self._covars_[self._covars_ == 0] = 1e-5
def _init(self, obs, params='stmc'): super(WeightedGaussianHMM, self)._init(obs, params=params) if (hasattr(self, 'n_features') and self.n_features != obs[0].shape[1]): raise ValueError('Unexpected number of dimensions, got %s but ' 'expected %s' % (obs[0].shape[1], self.n_features)) self.n_features = obs[0].shape[1] if 'm' in params: # Evenly spaced states indices = np.fix( np.linspace(0,obs[0].shape[0]-1,self.n_components)).astype(int) self._means_ = obs[0][indices,:] if 'c' in params: cv = np.cov(obs[0].T).clip(min=1e-3) if not cv.shape: cv.shape = (1, 1) self._covars_ = distribute_covar_matrix_to_match_covariance_type( cv, self._covariance_type, self.n_components)
def _init(self, X, logger, kmeans_opt, lengths=None): super(GaussianHMM, self)._init(X, lengths=lengths) _, n_features = X.shape if hasattr(self, 'n_features') and self.n_features != n_features: raise ValueError('Unexpected number of dimensions, got %s but ' 'expected %s' % (n_features, self.n_features)) self.n_features = n_features if 'm' in self.init_params or not hasattr(self, "means_"): if kmeans_opt == 'REGULAR' or kmeans_opt == '': logger.getLogger('tab.regular').info('using K-means model') kmeans = cluster.KMeans(n_clusters=self.n_components, n_jobs=4, verbose=True) else: logger.getLogger('tab.regular').info('using Mini Batch K-Means model') kmeans = cluster.MiniBatchKMeans(n_clusters=self.n_components, batch_size=1000000, compute_labels=False, verbose=True) logger.getLogger('tab.regular.time').info('starting training model') kmeans.fit(X) logger.getLogger('tab.regular.time').info('finished training k-means model') self.means_ = kmeans.cluster_centers_ if 'c' in self.init_params or not hasattr(self, "covars_"): logger.getLogger('tab.regular.time').info('starting calculating covariances') cv = np.cov(X[:].T) logger.getLogger('tab.regular.time').info('finished calculating covariances') if not cv.shape: cv.shape = (1, 1) self._covars_ = distribute_covar_matrix_to_match_covariance_type( cv, self.covariance_type, self.n_components) self._covars_ = self._covars_.copy() if self._covars_.any() == 0: self._covars_[self._covars_ == 0] = 1e-5
for numState in range(2, maxSubstates+1): # make the output directory call(["mkdir", "-p", "data/substates/%s%d/%d" % (basepath,stateNum,numState)]) # do the replicates for repInx in range(0, numReps): print "Doing replicate", repInx, "/", numReps, "with", numState, "states" sys.stdout.flush() # cluster all the available data and use that as initial point means = cluster.KMeans(n_clusters=numState).fit(indata.iloc[:,0:num_data]).cluster_centers_ cv = np.cov(indata.iloc[:,0:num_data].T) covars = mixture.distribute_covar_matrix_to_match_covariance_type(cv, "tied", num_data) covars[covars==0] = 1e-5 model = GaussianHMM(numState, covariance_type="tied", n_iter=1000, init_params='abdefghijklnopqrstuvwxyzABDEFGHIJKLNOPQRSTUVWXYZ') model.means_ = means model.covars_ = covars print("Fitting model...") sys.stdout.flush() model.fit(data) print("Decoding states...") sys.stdout.flush() # do a loop over everything and record in one long array states = np.array([]) score = 0