Exemplo n.º 1
0
def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_examples):
	from modshogun import StringWordFeatures, StringCharFeatures, CUBE
	from modshogun import HMM, BW_NORMAL

	charfeat=StringCharFeatures(CUBE)
	charfeat.set_features(fm_cube)
	feats=StringWordFeatures(charfeat.get_alphabet())
	feats.obtain_from_char(charfeat, order-1, order, gap, reverse)

	hmm=HMM(feats, N, M, pseudo)
	hmm.train()
	hmm.baum_welch_viterbi_train(BW_NORMAL)

	num_examples=feats.get_num_vectors()
	num_param=hmm.get_num_model_parameters()
	for i in range(num_examples):
		for j in range(num_param):
			hmm.get_log_derivative(j, i)

	best_path=0
	best_path_state=0
	for i in range(num_examples):
		best_path+=hmm.best_path(i)
		for j in range(N):
			best_path_state+=hmm.get_best_path_state(i, j)

	lik_example = hmm.get_log_likelihood()
	lik_sample = hmm.get_log_likelihood_sample()

	return lik_example, lik_sample, hmm
Exemplo n.º 2
0
def _evaluate_top_fisher(indata, prefix):
    feats = {}
    wordfeats = util.get_features(indata, prefix)

    pos_train = HMM(wordfeats['train'], indata[prefix + 'N'],
                    indata[prefix + 'M'], indata[prefix + 'pseudo'])
    pos_train.train()
    pos_train.baum_welch_viterbi_train(BW_NORMAL)
    neg_train = HMM(wordfeats['train'], indata[prefix + 'N'],
                    indata[prefix + 'M'], indata[prefix + 'pseudo'])
    neg_train.train()
    neg_train.baum_welch_viterbi_train(BW_NORMAL)
    pos_test = HMM(pos_train)
    pos_test.set_observations(wordfeats['test'])
    neg_test = HMM(neg_train)
    neg_test.set_observations(wordfeats['test'])

    if indata[prefix + 'name'] == 'TOP':
        feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False)
        feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False)
    else:
        feats['train'] = FKFeatures(10, pos_train, neg_train)
        feats['train'].set_opt_a(-1)  #estimate prior
        feats['test'] = FKFeatures(10, pos_test, neg_test)
        feats['test'].set_a(
            feats['train'].get_a())  #use prior from training data

    prefix = 'kernel_'
    args = util.get_args(indata, prefix)
    kernel = PolyKernel(feats['train'], feats['train'], *args)
    #	kernel=PolyKernel(*args)
    #	kernel.init(feats['train'], feats['train'])
    km_train = max(
        abs(indata[prefix + 'matrix_train'] - kernel.get_kernel_matrix()).flat)
    kernel.init(feats['train'], feats['test'])
    km_test = max(
        abs(indata[prefix + 'matrix_test'] - kernel.get_kernel_matrix()).flat)

    return util.check_accuracy(indata[prefix + 'accuracy'],
                               km_train=km_train,
                               km_test=km_test)
Exemplo n.º 3
0
def _evaluate_top_fisher (indata, prefix):
	feats={}
	wordfeats=util.get_features(indata, prefix)

	pos_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'],
		indata[prefix+'pseudo'])
	pos_train.train()
	pos_train.baum_welch_viterbi_train(BW_NORMAL)
	neg_train=HMM(wordfeats['train'], indata[prefix+'N'], indata[prefix+'M'],
		indata[prefix+'pseudo'])
	neg_train.train()
	neg_train.baum_welch_viterbi_train(BW_NORMAL)
	pos_test=HMM(pos_train)
	pos_test.set_observations(wordfeats['test'])
	neg_test=HMM(neg_train)
	neg_test.set_observations(wordfeats['test'])

	if indata[prefix+'name']=='TOP':
		feats['train']=TOPFeatures(10, pos_train, neg_train, False, False)
		feats['test']=TOPFeatures(10, pos_test, neg_test, False, False)
	else:
		feats['train']=FKFeatures(10, pos_train, neg_train)
		feats['train'].set_opt_a(-1) #estimate prior
		feats['test']=FKFeatures(10, pos_test, neg_test)
		feats['test'].set_a(feats['train'].get_a()) #use prior from training data

	prefix='kernel_'
	args=util.get_args(indata, prefix)
	kernel=PolyKernel(feats['train'], feats['train'], *args)
#	kernel=PolyKernel(*args)
#	kernel.init(feats['train'], feats['train'])
	km_train=max(abs(
		indata[prefix+'matrix_train']-kernel.get_kernel_matrix()).flat)
	kernel.init(feats['train'], feats['test'])
	km_test=max(abs(
		indata[prefix+'matrix_test']-kernel.get_kernel_matrix()).flat)

	return util.check_accuracy(indata[prefix+'accuracy'],
		km_train=km_train, km_test=km_test)