def predict(self, X):
        '''
        Input
        @ X: dimension x sample x length #samples x known steps
        Output
        @ observation distribution: mu, var #samples x 1 [list]        
        '''

        # sample x some length
        X_test = util.convert_sequence(X, emission=False)

        mu_l = []
        cov_l = []

        for i in xrange(len(X_test)):

            # Past profile
            final_ts_obj = ghmm.EmissionSequence(self.F, X_test[i].tolist())

            try:
                # alpha: X_test length y #latent States at the moment t when state i is ended
                #        test_profile_length x number_of_hidden_state
                (alpha, scale) = self.ml.forward(final_ts_obj)
                alpha = np.array(alpha)
                scale = np.array(scale)
            except:
                print "No alpha is available !!"
                sys.exit()
                ## continue

            f = lambda x: round(x, 12)
            for j in range(len(alpha)):
                alpha[j] = map(f, alpha[j])
            alpha[-1] = map(f, alpha[-1])

            n = len(X_test[i])
            t_mu = np.zeros(self.nEmissionDim)
            t_cov = np.zeros(self.nEmissionDim * self.nEmissionDim)
            t_sum = 0.0
            for j in xrange(self.nState):  # N+1

                total = np.sum(
                    self.A[:, j] *
                    alpha[n / self.nEmissionDim - 1, :])  #* scaling_factor
                [mu, cov] = self.B[j]

                t_mu += np.array(mu) * total
                t_cov += np.array(cov) * (total**2)
                t_sum += total

            mu_l.append(t_mu.tolist())
            cov_l.append(t_cov.tolist())

        return mu_l, cov_l
 def loglikelihoods(self, X, bPosterior=False, bIdx=False, startIdx=1):
     '''
     @ X: dimension x sample x length
     @ bIdx: enable to return indices 
     return: the likelihoods over time (in single data)
     '''
     # sample x some length
     X_test = util.convert_sequence(X, emission=False)
     return self.loglikelihoods_from_seqs(X_test,
                                          bPosterior=bPosterior,
                                          bIdx=bIdx,
                                          startIdx=startIdx)
def computeLikelihood(idx, A, B, pi, F, X, nEmissionDim, nState, startIdx=1, \
                      bPosterior=False, converted_X=False, cov_type='full'):
    '''
    This function will be deprecated. Please, use computeLikelihoods.
    '''

    if nEmissionDim >= 2:
        ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F),
                                  A, B, pi)
        if cov_type == 'diag' or cov_type.find('diag') >= 0:
            ml.setDiagonalCovariance(1)
    else:
        ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi)

    if converted_X is False:
        X_test = util.convert_sequence(X, emission=False)
        X_test = np.squeeze(X_test)
        X_test = X_test.tolist()
    else:
        X_test = X

    l_idx = []
    l_likelihood = []
    l_posterior = []

    for i in xrange(startIdx, len(X_test) / nEmissionDim):
        final_ts_obj = ghmm.EmissionSequence(F, X_test[:i * nEmissionDim])

        try:
            logp = ml.loglikelihood(final_ts_obj)
            if bPosterior: post = np.array(ml.posterior(final_ts_obj))
        except:
            print "Unexpected profile!! GHMM cannot handle too low probability. Underflow?"

            l_idx.append(i)
            l_likelihood.append(-100000000)
            if bPosterior:
                if len(l_posterior) == 0: l_posterior.append(list(pi))
                else: l_posterior.append(l_posterior[-1])
            ## return False, False # anomaly
            continue

        l_idx.append(i)
        l_likelihood.append(logp)
        if bPosterior: l_posterior.append(post[i - 1])

    if bPosterior:
        return idx, l_idx, l_likelihood, l_posterior
    else:
        return idx, l_idx, l_likelihood
def computeLikelihoods(idx, A, B, pi, F, X, nEmissionDim, nState, startIdx=2, \
                       bPosterior=False, converted_X=False, cov_type='full'):
    '''
    Input:
    - X: dimension x length
    '''

    if nEmissionDim >= 2:
        ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F),
                                  A, B, pi)
        if cov_type == 'diag': ml.setDiagonalCovariance(1)
    else:
        ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi)

    X_test = util.convert_sequence(X, emission=False)
    X_test = np.squeeze(X_test)

    l_idx = []
    l_likelihood = []
    l_posterior = []

    for i in xrange(startIdx, len(X[0])):
        final_ts_obj = ghmm.EmissionSequence(
            F, X_test[:i * nEmissionDim].tolist())

        try:
            logp = ml.loglikelihood(final_ts_obj)
            if bPosterior: post = np.array(ml.posterior(final_ts_obj))
            l_likelihood.append(logp)
            if bPosterior: l_posterior.append(post[i - 1])
        except:
            print "Unexpected profile!! GHMM cannot handle too low probability. Underflow?"
            ## return False, False # anomaly
            ## continue
            # we keep the state as the previous one
            l_likelihood.append(-1000000000000)
            if bPosterior:
                if len(l_posterior) == 0: l_posterior.append(list(pi))
                else: l_posterior.append(l_posterior[-1])

        l_idx.append(i)

    if bPosterior: return idx, l_idx, l_likelihood, l_posterior
    else: return idx, l_idx, l_likelihood
    def loglikelihood(self, X, bPosterior=False):
        '''        
        shape?
        return: the likelihood of a sequence
        '''
        X_test = util.convert_sequence(X, emission=False)
        X_test = np.squeeze(X_test)
        final_ts_obj = ghmm.EmissionSequence(self.F, X_test.tolist())

        try:
            logp = self.ml.loglikelihood(final_ts_obj)
            if bPosterior: post = np.array(self.ml.posterior(final_ts_obj))
        except:
            print 'Likelihood error!!!!'
            if bPosterior: return None, None
            return None

        if bPosterior: return logp, post
        return logp
    def partial_fit(self, xData, learningRate=0.2, nrSteps=1, max_iter=100):
        ''' Online update of HMM using online Baum-Welch algorithm
        '''

        X = [np.array(data) for data in xData]
        nData = len(X[0])

        # print 'Creating Training Data'
        X_train = util.convert_sequence(X)  # Training input
        X_train = X_train.tolist()

        if self.verbose:
            print 'Run Baum Welch method with (samples, length)', np.shape(
                X_train)
        if learningRate < 1e-5: learningRate = 1e-5

        final_seq = ghmm.SequenceSet(self.F, X_train)
        for i in xrange(max_iter):
            ret = self.ml.baumWelch(final_seq,
                                    nrSteps=nrSteps,
                                    learningRate=learningRate)

            if np.isnan(ret):
                print 'Baum Welch return:', ret
                return 'Failure'
            if i > 0:
                if abs(last_ret - ret) < 1.0:
                    print "Partial fitting is converged to ", ret, " from ", last_ret
                    break
            last_ret = ret

        print 'Baum Welch return:', ret / float(nData)

        [self.A, self.B, self.pi] = self.ml.asMatrices()
        self.A = np.array(self.A)
        self.B = np.array(self.B)

        return ret
    def getLoglikelihoods(self, xData, posterior=False, startIdx=1, n_jobs=-1):
        '''
        shape?
        '''
        warnings.simplefilter("always", DeprecationWarning)
        X = [np.array(data) for data in xData]
        X_test = util.convert_sequence(X)  # Training input
        X_test = X_test.tolist()

        n, _ = np.shape(X[0])

        # Estimate loglikelihoods and corresponding posteriors
        r = Parallel(n_jobs=n_jobs)(delayed(computeLikelihood)(i, self.A, self.B, self.pi, self.F, X_test[i], \
                                                           self.nEmissionDim, self.nState,\
                                                           startIdx=startIdx,\
                                                           bPosterior=posterior, converted_X=True)
                                                           for i in xrange(n))
        if posterior:
            _, ll_idx, ll_logp, ll_post = zip(*r)
            return ll_idx, ll_logp, ll_post
        else:
            _, ll_idx, ll_logp = zip(*r)
            return ll_idx, ll_logp
Пример #8
0
        cov[:, i, j] *= cov_mult[nEmissionDim*i + j]
        
# Emission probability matrix
B = [0] * nState
for i in range(nState):
    B[i] = [[mu[i] for mu in mus]]
    B[i].append(cov[i].flatten())

# pi - initial probabilities per state 
pi = [0.0] * nState
pi[0] = 1.0


ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi)
print 'Creating Training Data'            
X_train = util.convert_sequence(X) # Training input
X_train = X_train.tolist()
print "training data size: ", np.shape(X_train)

## ml.cmodel.getState(0).setOutProb(1, 0, 0.8)
## print ml.cmodel.getState(0).getOutProb(1)
## print ml.cmodel.getState(0).getOutNum(1)
if cov_type=='diag': ml.setDiagonalCovariance(0)

final_seq = ghmm.SequenceSet(F, X_train)
print 'Run Baum Welch method with (samples, length)', np.shape(X_train)
ret = ml.baumWelch(final_seq, 10000, fixedTrans=1)

######################### Test ###########################################
[A_new, B_new, pi_new] = ml.asMatrices()
    def fit(self, xData, A=None, B=None, pi=None, cov_mult=None,
            ml_pkl=None, use_pkl=False, cov_type='full', fixed_trans=0,\
            shuffle=False):
        '''
        Input :
        - xData: dimension x sample x length
        Issues:
        - If NaN is returned, the reason can be one of followings,
        -- lower cov
        -- small range of xData (you have to scale it up.)
        '''

        # Daehyung: What is the shape and type of input data?
        if shuffle:
            X = xData
            X = np.swapaxes(X, 0, 1)
            id_list = range(len(X))
            random.shuffle(id_list)
            X = np.array(X)[id_list]
            X = np.swapaxes(X, 0, 1)
        else:
            X = [np.array(data) for data in xData]
        nData = len(xData[0])

        param_dict = {}

        # Load pre-trained HMM without training
        if use_pkl and ml_pkl is not None and os.path.isfile(ml_pkl):
            if self.verbose: print "Load HMM parameters without train the hmm"

            param_dict = ut.load_pickle(ml_pkl)
            self.A = param_dict['A']
            self.B = param_dict['B']
            self.pi = param_dict['pi']
            if self.nEmissionDim == 1:
                self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \
                                               self.A, self.B, self.pi)
            else:
                self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \
                                               self.A, self.B, self.pi)

            out_a_num = param_dict.get('out_a_num', None)
            vec_num = param_dict.get('vec_num', None)
            mat_num = param_dict.get('mat_num', None)
            u_denom = param_dict.get('u_denom', None)
            if out_a_num is not None:
                self.ml.setBaumWelchParams(out_a_num, vec_num, mat_num,
                                           u_denom)

            return True
        else:

            if ml_pkl is None:
                ml_pkl = os.path.join(os.path.dirname(__file__),
                                      'ml_temp_n.pkl')

            if cov_mult is None:
                cov_mult = [1.0] * (self.nEmissionDim**2)

            if A is None:
                if self.verbose: print "Generating a new A matrix"
                # Transition probability matrix (Initial transition probability, TODO?)
                A = util.init_trans_mat(self.nState).tolist()

            if B is None:
                if self.verbose: print "Generating a new B matrix"
                # We should think about multivariate Gaussian pdf.

                mus, cov = util.vectors_to_mean_cov(X,
                                                    self.nState,
                                                    self.nEmissionDim,
                                                    cov_type=cov_type)
                ## print np.shape(mus), np.shape(cov)

                # cov: state x dim x dim
                for i in xrange(self.nEmissionDim):
                    for j in xrange(self.nEmissionDim):
                        cov[:, i, j] *= cov_mult[self.nEmissionDim * i + j]

                if self.verbose:
                    for i, mu in enumerate(mus):
                        print 'mu%i' % i, mu
                    ## print 'cov', cov

                # Emission probability matrix
                B = [0] * self.nState
                for i in range(self.nState):
                    if self.nEmissionDim > 1:
                        B[i] = [[mu[i] for mu in mus]]
                        B[i].append(cov[i].flatten().tolist())
                    else:
                        B[i] = [np.squeeze(mus[0][i]), float(cov[i])]
            if pi is None:
                # pi - initial probabilities per state
                ## pi = [1.0/float(self.nState)] * self.nState
                pi = [0.0] * self.nState
                pi[0] = 1.0

            # print 'Generating HMM'
            # HMM model object
            if self.nEmissionDim == 1:
                self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \
                                               A, B, pi)
            else:
                self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \
                                               A, B, pi)
            if cov_type == 'diag': self.ml.setDiagonalCovariance(1)

            # print 'Creating Training Data'
            X_train = util.convert_sequence(X)  # Training input
            X_train = X_train.tolist()
            if self.verbose: print "training data size: ", np.shape(X_train)

            if self.verbose:
                print 'Run Baum Welch method with (samples, length)', np.shape(
                    X_train)
            final_seq = ghmm.SequenceSet(self.F, X_train)
            ## ret = self.ml.baumWelch(final_seq, loglikelihoodCutoff=2.0)
            ret = self.ml.baumWelch(final_seq,
                                    10000)  #, fixedTrans=fixed_trans)
            if np.isnan(ret):
                print 'Baum Welch return:', ret
                return 'Failure'
            print 'Baum Welch return:', ret / float(nData)

            [self.A, self.B, self.pi] = self.ml.asMatrices()
            self.A = np.array(self.A)
            self.B = np.array(self.B)

            param_dict['A'] = self.A
            param_dict['B'] = self.B
            param_dict['pi'] = self.pi

            try:
                [out_a_num, vec_num, mat_num,
                 u_denom] = self.ml.getBaumWelchParams()
                param_dict['out_a_num'] = out_a_num
                param_dict['vec_num'] = vec_num
                param_dict['mat_num'] = mat_num
                param_dict['u_denom'] = u_denom
            except:
                print "Install new ghmm!!"

            if ml_pkl is not None: ut.save_pickle(param_dict, ml_pkl)
            return ret / float(nData)