Пример #1
0
    def fit(self, X, lengths=None):
        """Estimate model parameters.

		An initialization step is performed before entering the
		EM algorithm. If you want to avoid this step for a subset of
		the parameters, pass proper ``init_params`` keyword argument
		to estimator's constructor.

		Parameters
		----------
		X : array-like, shape (n_samples, n_features)
			Feature matrix of individual samples.

		lengths : array-like of integers, shape (n_sequences, )
			Lengths of the individual sequences in ``X``. The sum of
			these should be ``n_samples``.

		Returns
		-------
		self : object
			Returns self.
		"""
        print("Initilization...")
        X = check_array(X)
        self._init(X, lengths=lengths)
        self._check()

        self.monitor_ = ConvergenceMonitor(self.tol, self.n_iter, self.verbose)
        for iter in range(self.n_iter):
            stats = self._initialize_sufficient_statistics()
            curr_logprob = 0
            for i, j in iter_from_X_lengths(X, lengths):
                framelogprob = self._compute_log_likelihood(X[i:j])
                logprob, fwdlattice = self._do_forward_pass(framelogprob)
                curr_logprob += logprob
                bwdlattice = self._do_backward_pass(framelogprob)
                posteriors = self._compute_posteriors(fwdlattice, bwdlattice)
                self._accumulate_sufficient_statistics(stats, X[i:j],
                                                       framelogprob,
                                                       posteriors, fwdlattice,
                                                       bwdlattice)
                #if iter%3==0:
                #    filename = "log1a/stats_iter_%d_%d"%(self.run_id,iter)
                #    with open(filename,'a') as f_handle:
                #        np.savetxt(f_handle,posteriors, fmt='%.4f', delimiter='\t')

            # filename = "log1/stats_iter_%d_%d"%(self.run_id,iter)
            # np.savetxt(filename, posteriors, fmt='%.4f', delimiter='\t')

            # XXX must be before convergence check, because otherwise
            #     there won't be any updates for the case ``n_iter=1``.
            #print("Maximization...")
            #print(iter)
            self._do_mstep(stats)

            self.monitor_.report(curr_logprob)
            if self.monitor_.converged:
                break

        return self
Пример #2
0
    def score(self, X, lengths=None):
        """Compute the log probability under the model.

		Parameters
		----------
		X : array-like, shape (n_samples, n_features)
			Feature matrix of individual samples.

		lengths : array-like of integers, shape (n_sequences, ), optional
			Lengths of the individual sequences in ``X``. The sum of
			these should be ``n_samples``.

		Returns
		-------
		logprob : float
			Log likelihood of ``X``.

		See Also
		--------
		score_samples : Compute the log probability under the model and
			posteriors.
		decode : Find most likely state sequence corresponding to ``X``.
		"""
        check_is_fitted(self, "startprob_")
        self._check()

        X = check_array(X)
        # XXX we can unroll forward pass for speed and memory efficiency.
        logprob = 0
        for i, j in iter_from_X_lengths(X, lengths):
            framelogprob = self._compute_log_likelihood(X[i:j])
            logprobij, _fwdlattice = self._do_forward_pass(framelogprob)
            logprob += logprobij
        return logprob
Пример #3
0
    def decode(self, X, lengths=None, algorithm=None):
        """Find most likely state sequence corresponding to ``X``.

		Parameters
		----------
		X : array-like, shape (n_samples, n_features)
			Feature matrix of individual samples.

		lengths : array-like of integers, shape (n_sequences, ), optional
			Lengths of the individual sequences in ``X``. The sum of
			these should be ``n_samples``.

		algorithm : string
			Decoder algorithm. Must be one of "viterbi" or "map".
			If not given, :attr:`decoder` is used.

		Returns
		-------
		logprob : float
			Log probability of the produced state sequence.

		state_sequence : array, shape (n_samples, )
			Labels for each sample from ``X`` obtained via a given
			decoder ``algorithm``.

		See Also
		--------
		score_samples : Compute the log probability under the model and
			posteriors.
		score : Compute the log probability under the model.
		"""
        check_is_fitted(self, "startprob_")
        self._check()

        algorithm = algorithm or self.algorithm
        if algorithm not in DECODER_ALGORITHMS:
            raise ValueError("Unknown decoder {0!r}".format(algorithm))

        decoder = {
            "viterbi": self._decode_viterbi,
            "map": self._decode_map
        }[algorithm]

        X = check_array(X)
        n_samples = X.shape[0]
        logprob = 0
        state_sequence = np.empty(n_samples, dtype=int)
        for i, j in iter_from_X_lengths(X, lengths):
            # XXX decoder works on a single sample at a time!
            logprobij, state_sequenceij = decoder(X[i:j])
            logprob += logprobij
            state_sequence[i:j] = state_sequenceij

        return logprob, state_sequence
Пример #4
0
    def score_samples(self, X, lengths=None):
        """Compute the log probability under the model and compute posteriors.

		Parameters
		----------
		X : array-like, shape (n_samples, n_features)
			Feature matrix of individual samples.

		lengths : array-like of integers, shape (n_sequences, ), optional
			Lengths of the individual sequences in ``X``. The sum of
			these should be ``n_samples``.

		Returns
		-------
		logprob : float
			Log likelihood of ``X``.

		posteriors : array, shape (n_samples, n_components)
			State-membership probabilities for each sample in ``X``.

		See Also
		--------
		score : Compute the    probability under the model.
		decode : Find most likely state sequence corresponding to ``X``.
		"""
        check_is_fitted(self, "startprob_")
        self._check()

        X = check_array(X)
        n_samples = X.shape[0]
        logprob = 0
        posteriors = np.zeros((n_samples, self.n_components))
        for i, j in iter_from_X_lengths(X, lengths):
            framelogprob = self._compute_log_likelihood(X[i:j])
            logprobij, fwdlattice = self._do_forward_pass(framelogprob)
            logprob += logprobij

            bwdlattice = self._do_backward_pass(framelogprob)
            posteriors[i:j] = self._compute_posteriors(fwdlattice, bwdlattice)
        return logprob, posteriors