# In this example the Histogram algorithm object computes a histogram over all
# 16bit unsigned integers in the features.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
cubedna=lm.load_cubes('../data/fm_train_cube.dat')
parameter_list=[[traindna,cubedna,3,0,'n'],[traindna,cubedna,4,0,'n']]

def distribution_histogram(fm_train=traindna,fm_cube=cubedna,order=3,
			    gap=0,reverse='n'):

#	sg('new_distribution', 'HISTOGRAM')
	sg('add_preproc', 'SORTWORDSTRING')

	sg('set_features', 'TRAIN', fm_train, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')
#	sg('train_distribution')
#	histo=sg('get_histogram')

#	num_examples=11
#	num_param=sg('get_histogram_num_model_parameters')
#	for i in xrange(num_examples):
#		for j in xrange(num_param):
#			sg('get_log_derivative %d %d' % (j, i))

#	sg('get_log_likelihood')
#	return sg('get_log_likelihood_sample')
示例#2
0
#!/usr/bin/env python
from tools.load import LoadMatrix
lm = LoadMatrix()
data = lm.load_cubes('../data/fm_train_cube.dat')

parameter_list = [[data, 1, 64, 1e-5, 2, 0, False, 5],
                  [data, 3, 6, 1e-1, 1, 0, False, 2]]


def distribution_hmm(fm_cube, N, M, pseudo, order, gap, reverse, num_examples):
    from shogun import StringWordFeatures, StringCharFeatures, CUBE
    from shogun import HMM, BW_NORMAL

    charfeat = StringCharFeatures(CUBE)
    charfeat.set_features(fm_cube)
    feats = StringWordFeatures(charfeat.get_alphabet())
    feats.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    hmm = HMM(feats, N, M, pseudo)
    hmm.train()
    hmm.baum_welch_viterbi_train(BW_NORMAL)

    num_examples = feats.get_num_vectors()
    num_param = hmm.get_num_model_parameters()
    for i in range(num_examples):
        for j in range(num_param):
            hmm.get_log_derivative(j, i)

    best_path = 0
    best_path_state = 0
    for i in range(num_examples):
# In this example the Histogram algorithm object computes a histogram over all
# 16bit unsigned integers in the features.

from tools.load import LoadMatrix
from sg import sg

lm = LoadMatrix()
traindna = lm.load_dna("../data/fm_train_dna.dat")
cubedna = lm.load_cubes("../data/fm_train_cube.dat")
parameter_list = [[traindna, cubedna, 3, 0, "n"], [traindna, cubedna, 4, 0, "n"]]


def distribution_histogram(fm_train=traindna, fm_cube=cubedna, order=3, gap=0, reverse="n"):

    # 	sg('new_distribution', 'HISTOGRAM')
    sg("add_preproc", "SORTWORDSTRING")

    sg("set_features", "TRAIN", fm_train, "DNA")
    sg("convert", "TRAIN", "STRING", "CHAR", "STRING", "WORD", order, order - 1, gap, reverse)
    sg("attach_preproc", "TRAIN")


# 	sg('train_distribution')
# 	histo=sg('get_histogram')

# 	num_examples=11
# 	num_param=sg('get_histogram_num_model_parameters')
# 	for i in xrange(num_examples):
# 		for j in xrange(num_param):
# 			sg('get_log_derivative %d %d' % (j, i))