def fit(self, X, lengths=None): """Estimate model parameters. An initialization step is performed before entering the EM algorithm. If you want to avoid this step for a subset of the parameters, pass proper ``init_params`` keyword argument to estimator's constructor. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ) Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- self : object Returns self. """ print("Initilization...") X = check_array(X) self._init(X, lengths=lengths) self._check() self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose) for iter in range(self.n_iter): stats = self._initialize_sufficient_statistics() curr_logprob = 0 for i, j in iter_from_X_lengths(X, lengths): framelogprob = self._compute_log_likelihood(X[i:j]) logprob, fwdlattice = self._do_forward_pass(framelogprob) curr_logprob += logprob bwdlattice = self._do_backward_pass(framelogprob) posteriors = self._compute_posteriors(fwdlattice, bwdlattice) self._accumulate_sufficient_statistics(stats, X[i:j], framelogprob, posteriors, fwdlattice, bwdlattice) #if iter%3==0: # filename = "log1a/stats_iter_%d_%d"%(self.run_id,iter) # with open(filename,'a') as f_handle: # np.savetxt(f_handle,posteriors, fmt='%.4f', delimiter='\t') # filename = "log1/stats_iter_%d_%d"%(self.run_id,iter) # np.savetxt(filename, posteriors, fmt='%.4f', delimiter='\t') # XXX must be before convergence check, because otherwise # there won't be any updates for the case ``n_iter=1``. #print("Maximization...") #print(iter) self._do_mstep(stats) self.monitor_.report(curr_logprob) if self.monitor_.converged: break return self
def score(self, X, lengths=None): """Compute the log probability under the model. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ), optional Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- logprob : float Log likelihood of ``X``. See Also -------- score_samples : Compute the log probability under the model and posteriors. decode : Find most likely state sequence corresponding to ``X``. """ check_is_fitted(self, "startprob_") self._check() X = check_array(X) # XXX we can unroll forward pass for speed and memory efficiency. logprob = 0 for i, j in iter_from_X_lengths(X, lengths): framelogprob = self._compute_log_likelihood(X[i:j]) logprobij, _fwdlattice = self._do_forward_pass(framelogprob) logprob += logprobij return logprob
def decode(self, X, lengths=None, algorithm=None): """Find most likely state sequence corresponding to ``X``. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ), optional Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. algorithm : string Decoder algorithm. Must be one of "viterbi" or "map". If not given, :attr:`decoder` is used. Returns ------- logprob : float Log probability of the produced state sequence. state_sequence : array, shape (n_samples, ) Labels for each sample from ``X`` obtained via a given decoder ``algorithm``. See Also -------- score_samples : Compute the log probability under the model and posteriors. score : Compute the log probability under the model. """ check_is_fitted(self, "startprob_") self._check() algorithm = algorithm or self.algorithm if algorithm not in DECODER_ALGORITHMS: raise ValueError("Unknown decoder {0!r}".format(algorithm)) decoder = { "viterbi": self._decode_viterbi, "map": self._decode_map }[algorithm] X = check_array(X) n_samples = X.shape[0] logprob = 0 state_sequence = np.empty(n_samples, dtype=int) for i, j in iter_from_X_lengths(X, lengths): # XXX decoder works on a single sample at a time! logprobij, state_sequenceij = decoder(X[i:j]) logprob += logprobij state_sequence[i:j] = state_sequenceij return logprob, state_sequence
def score_samples(self, X, lengths=None): """Compute the log probability under the model and compute posteriors. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature matrix of individual samples. lengths : array-like of integers, shape (n_sequences, ), optional Lengths of the individual sequences in ``X``. The sum of these should be ``n_samples``. Returns ------- logprob : float Log likelihood of ``X``. posteriors : array, shape (n_samples, n_components) State-membership probabilities for each sample in ``X``. See Also -------- score : Compute the probability under the model. decode : Find most likely state sequence corresponding to ``X``. """ check_is_fitted(self, "startprob_") self._check() X = check_array(X) n_samples = X.shape[0] logprob = 0 posteriors = np.zeros((n_samples, self.n_components)) for i, j in iter_from_X_lengths(X, lengths): framelogprob = self._compute_log_likelihood(X[i:j]) logprobij, fwdlattice = self._do_forward_pass(framelogprob) logprob += logprobij bwdlattice = self._do_backward_pass(framelogprob) posteriors[i:j] = self._compute_posteriors(fwdlattice, bwdlattice) return logprob, posteriors