Пример #1
0
def kernel_top_modular(fm_train_dna=traindat,
                       fm_test_dna=testdat,
                       label_train_dna=label_traindat,
                       pseudo=1e-1,
                       order=1,
                       gap=0,
                       reverse=False,
                       kargs=[1, False, True]):
    from shogun.Features import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA
    from shogun.Kernel import PolyKernel
    from shogun.Distribution import HMM, BW_NORMAL

    N = 1  # toy HMM with 1 state
    M = 4  # 4 observations -> DNA

    # train HMM for positive class
    charfeat = StringCharFeatures(fm_hmm_pos, DNA)
    hmm_pos_train = StringWordFeatures(charfeat.get_alphabet())
    hmm_pos_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    pos = HMM(hmm_pos_train, N, M, pseudo)
    pos.baum_welch_viterbi_train(BW_NORMAL)

    # train HMM for negative class
    charfeat = StringCharFeatures(fm_hmm_neg, DNA)
    hmm_neg_train = StringWordFeatures(charfeat.get_alphabet())
    hmm_neg_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    neg = HMM(hmm_neg_train, N, M, pseudo)
    neg.baum_welch_viterbi_train(BW_NORMAL)

    # Kernel training data
    charfeat = StringCharFeatures(fm_train_dna, DNA)
    wordfeats_train = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    # Kernel testing data
    charfeat = StringCharFeatures(fm_test_dna, DNA)
    wordfeats_test = StringWordFeatures(charfeat.get_alphabet())
    wordfeats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)

    # get kernel on training data
    pos.set_observations(wordfeats_train)
    neg.set_observations(wordfeats_train)
    feats_train = TOPFeatures(10, pos, neg, False, False)
    kernel = PolyKernel(feats_train, feats_train, *kargs)
    km_train = kernel.get_kernel_matrix()

    # get kernel on testing data
    pos_clone = HMM(pos)
    neg_clone = HMM(neg)
    pos_clone.set_observations(wordfeats_test)
    neg_clone.set_observations(wordfeats_test)
    feats_test = TOPFeatures(10, pos_clone, neg_clone, False, False)
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
Пример #2
0
def _run_top_fisher():
    """Run Linear Kernel with {Top,Fisher}Features."""

    # put some constantness into randomness
    Math_init_random(dataop.INIT_RANDOM)

    data = dataop.get_cubes(4, 8)
    prefix = 'topfk_'
    params = {
        prefix + 'N': 3,
        prefix + 'M': 6,
        prefix + 'pseudo': 1e-1,
        prefix + 'order': 1,
        prefix + 'gap': 0,
        prefix + 'reverse': False,
        prefix + 'alphabet': 'CUBE',
        prefix + 'feature_class': 'string_complex',
        prefix + 'feature_type': 'Word',
        prefix + 'data_train': numpy.matrix(data['train']),
        prefix + 'data_test': numpy.matrix(data['test'])
    }

    wordfeats = featop.get_features(params[prefix + 'feature_class'],
                                    params[prefix + 'feature_type'], data,
                                    eval(params[prefix + 'alphabet']),
                                    params[prefix + 'order'],
                                    params[prefix + 'gap'],
                                    params[prefix + 'reverse'])
    pos_train = HMM(wordfeats['train'], params[prefix + 'N'],
                    params[prefix + 'M'], params[prefix + 'pseudo'])
    pos_train.train()
    pos_train.baum_welch_viterbi_train(BW_NORMAL)
    neg_train = HMM(wordfeats['train'], params[prefix + 'N'],
                    params[prefix + 'M'], params[prefix + 'pseudo'])
    neg_train.train()
    neg_train.baum_welch_viterbi_train(BW_NORMAL)
    pos_test = HMM(pos_train)
    pos_test.set_observations(wordfeats['test'])
    neg_test = HMM(neg_train)
    neg_test.set_observations(wordfeats['test'])
    feats = {}

    feats['train'] = TOPFeatures(10, pos_train, neg_train, False, False)
    feats['test'] = TOPFeatures(10, pos_test, neg_test, False, False)
    params[prefix + 'name'] = 'TOP'
    _compute_top_fisher(feats, params)

    feats['train'] = FKFeatures(10, pos_train, neg_train)
    feats['train'].set_opt_a(-1)  #estimate prior
    feats['test'] = FKFeatures(10, pos_test, neg_test)
    feats['test'].set_a(feats['train'].get_a())  #use prior from training data
    params[prefix + 'name'] = 'FK'
    _compute_top_fisher(feats, params)