del tree_normalizer

print "finished training tree-norm SVM:", svm_tree.get_objective()


wdk = shogun_factory.create_kernel(data.examples, param)
wdk.set_normalizer(normalizer)
wdk.init_normalizer()

print "--->",wdk.get_normalizer().get_name()

svm = SVMLight(cost, wdk, lab)
svm.set_linadd_enabled(False)
svm.set_batch_computation_enabled(False)

svm.train()

print "finished training manually set SVM:", svm.get_objective()


alphas_tree = svm_tree.get_alphas()
alphas = svm.get_alphas()

assert(len(alphas_tree)==len(alphas))

for i in xrange(len(alphas)):
    assert(abs(alphas_tree[i] - alphas[i]) < 0.0001)
    
print "success: all alphas are the same"

def test_data():
    
    ##################################################################
    # select MSS
    ##################################################################
    
    mss = expenv.MultiSplitSet.get(379)
    
    
    
    ##################################################################
    # data
    ##################################################################
    
    # fetch data
    instance_set = mss.get_train_data(-1)
    
    # prepare data
    data = PreparedMultitaskData(instance_set, shuffle=True)
    
    # set parameters
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"
    param.wdk_degree = 4
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.freeze()
    
    
    
    
    ##################################################################
    # taxonomy
    ##################################################################
    
    
    taxonomy = shogun_factory.create_taxonomy(mss.taxonomy.data)
    
    
    support = numpy.linspace(0, 100, 4)
    
    
    distances = [[0, 1, 2, 2], [1, 0, 2, 2], [2, 2, 0, 1], [2, 2, 1, 0]]
    
    # create tree normalizer 
    tree_normalizer = MultitaskKernelPlifNormalizer(support, data.task_vector_names)
    
    
    
    
    task_names = data.get_task_names()
    
    
    FACTOR = 1.0
    
    
    # init gamma matrix
    gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks()))
    
    for t1_name in task_names:
        for t2_name in task_names:
            
            similarity = taxonomy.compute_node_similarity(taxonomy.get_id(t1_name), taxonomy.get_id(t2_name))        
            gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)] = similarity
    
    helper.save("/tmp/gammas", gammas)
    
    
    gammas = gammas * FACTOR
    
    cost = param.cost * numpy.sqrt(FACTOR) 
    
    print gammas
    
    
    ##########
    # regular normalizer
    
    normalizer = MultitaskKernelNormalizer(data.task_vector_nums)
    
    for t1_name in task_names:
        for t2_name in task_names:
                    
            similarity = gammas[data.name_to_id(t1_name), data.name_to_id(t2_name)]
            normalizer.set_task_similarity(data.name_to_id(t1_name), data.name_to_id(t2_name), similarity)
    
                
    ##################################################################
    # Train SVMs
    ##################################################################
    
    # create shogun objects
    wdk_tree = shogun_factory.create_kernel(data.examples, param)
    lab = shogun_factory.create_labels(data.labels)
    
    wdk_tree.set_normalizer(tree_normalizer)
    wdk_tree.init_normalizer()
    
    print "--->",wdk_tree.get_normalizer().get_name()
    
    svm_tree = SVMLight(cost, wdk_tree, lab)
    svm_tree.set_linadd_enabled(False)
    svm_tree.set_batch_computation_enabled(False)
    
    svm_tree.train()
    
    del wdk_tree
    del tree_normalizer
    
    print "finished training tree-norm SVM:", svm_tree.get_objective()
    
    
    wdk = shogun_factory.create_kernel(data.examples, param)
    wdk.set_normalizer(normalizer)
    wdk.init_normalizer()
    
    print "--->",wdk.get_normalizer().get_name()
    
    svm = SVMLight(cost, wdk, lab)
    svm.set_linadd_enabled(False)
    svm.set_batch_computation_enabled(False)
    
    svm.train()
    
    print "finished training manually set SVM:", svm.get_objective()
    
    
    alphas_tree = svm_tree.get_alphas()
    alphas = svm.get_alphas()
    
    assert(len(alphas_tree)==len(alphas))
    
    for i in xrange(len(alphas)):
        assert(abs(alphas_tree[i] - alphas[i]) < 0.0001)
        
    print "success: all alphas are the same"
Exemplo n.º 3
0
def solver_mtk_shogun(C, all_xt, all_lt, task_indicator, M, L, eps,
                      target_obj):
    """
    implementation using multitask kernel
    """

    xt = numpy.array(all_xt)
    lt = numpy.array(all_lt)
    tt = numpy.array(task_indicator, dtype=numpy.int32)
    tsm = numpy.array(M)

    print "task_sim:", tsm

    num_tasks = L.shape[0]

    # sanity checks
    assert len(xt) == len(lt) == len(tt)
    assert M.shape == L.shape
    assert num_tasks == len(set(tt))

    # set up shogun objects
    if type(xt[0]) == numpy.string_:
        feat = StringCharFeatures(DNA)
        xt = [str(a) for a in xt]
        feat.set_features(xt)
        base_kernel = WeightedDegreeStringKernel(feat, feat, 8)
    else:
        feat = RealFeatures(xt.T)
        base_kernel = LinearKernel(feat, feat)

    lab = Labels(lt)

    # set up normalizer
    normalizer = MultitaskKernelNormalizer(tt.tolist())

    for i in xrange(num_tasks):
        for j in xrange(num_tasks):
            normalizer.set_task_similarity(i, j, M[i, j])

    print "num of unique tasks: ", normalizer.get_num_unique_tasks(
        task_indicator)

    # set up kernel
    base_kernel.set_cache_size(2000)
    base_kernel.set_normalizer(normalizer)
    base_kernel.init_normalizer()

    # set up svm
    svm = SVMLight()  #LibSVM()

    svm.set_epsilon(eps)
    #print "reducing num threads to one"
    #svm.parallel.set_num_threads(1)
    #print "using one thread"

    # how often do we like to compute objective etc
    svm.set_record_interval(0)
    svm.set_target_objective(target_obj)

    svm.set_linadd_enabled(False)
    svm.set_batch_computation_enabled(False)
    svm.io.set_loglevel(MSG_DEBUG)
    #SET THREADS TO 1

    svm.set_C(C, C)
    svm.set_bias_enabled(False)

    # prepare for training
    svm.set_labels(lab)
    svm.set_kernel(base_kernel)

    # train svm
    svm.train()

    train_times = svm.get_training_times()
    objectives = [-obj for obj in svm.get_dual_objectives()]

    if False:

        # get model parameters
        sv_idx = svm.get_support_vectors()
        sparse_alphas = svm.get_alphas()

        assert len(sv_idx) == len(sparse_alphas)

        # compute dense alpha (remove label)
        alphas = numpy.zeros(len(xt))
        for id_sparse, id_dense in enumerate(sv_idx):
            alphas[id_dense] = sparse_alphas[id_sparse] * lt[id_dense]

        # print alphas
        W = alphas_to_w(alphas, xt, lt, task_indicator, M)
        primal_obj = compute_primal_objective(
            W.reshape(W.shape[0] * W.shape[1]), C, all_xt, all_lt,
            task_indicator, L)
        objectives.append(primal_obj)
        train_times.append(train_times[-1] + 100)

    return objectives, train_times
Exemplo n.º 4
0
for idx in xrange(10):

    #f = (-numpy.ones(N)-2)*numpy.random.randn()


    print "############################"
    print "############################"
    print ""
    print "f:", f
    print "\n"

    svm.set_linear_term(numpy.double(f))
    svm.train()

    sv_idx = svm.get_support_vectors()
    alphas = svm.get_alphas()

    alphas_full = numpy.zeros(N)
    alphas_full[sv_idx] = alphas

    alphas_full = alphas_full * y

    print "svmlight objective:", svm.get_objective()
    print "svmlight alphas:", numpy.array(alphas_full[0:5])

    external_objective = 0.0

    for j in xrange(N):

        external_objective += alphas_full[j] * f[j]
def test_data():

    ##################################################################
    # select MSS
    ##################################################################

    mss = expenv.MultiSplitSet.get(379)

    ##################################################################
    # data
    ##################################################################

    # fetch data
    instance_set = mss.get_train_data(-1)

    # prepare data
    data = PreparedMultitaskData(instance_set, shuffle=True)

    # set parameters
    param = Options()
    param.kernel = "WeightedDegreeStringKernel"
    param.wdk_degree = 4
    param.cost = 1.0
    param.transform = 1.0
    param.id = 666
    param.freeze()

    ##################################################################
    # taxonomy
    ##################################################################

    taxonomy = shogun_factory.create_taxonomy(mss.taxonomy.data)

    support = numpy.linspace(0, 100, 4)

    distances = [[0, 1, 2, 2], [1, 0, 2, 2], [2, 2, 0, 1], [2, 2, 1, 0]]

    # create tree normalizer
    tree_normalizer = MultitaskKernelPlifNormalizer(support,
                                                    data.task_vector_names)

    task_names = data.get_task_names()

    FACTOR = 1.0

    # init gamma matrix
    gammas = numpy.zeros((data.get_num_tasks(), data.get_num_tasks()))

    for t1_name in task_names:
        for t2_name in task_names:

            similarity = taxonomy.compute_node_similarity(
                taxonomy.get_id(t1_name), taxonomy.get_id(t2_name))
            gammas[data.name_to_id(t1_name),
                   data.name_to_id(t2_name)] = similarity

    helper.save("/tmp/gammas", gammas)

    gammas = gammas * FACTOR

    cost = param.cost * numpy.sqrt(FACTOR)

    print gammas

    ##########
    # regular normalizer

    normalizer = MultitaskKernelNormalizer(data.task_vector_nums)

    for t1_name in task_names:
        for t2_name in task_names:

            similarity = gammas[data.name_to_id(t1_name),
                                data.name_to_id(t2_name)]
            normalizer.set_task_similarity(data.name_to_id(t1_name),
                                           data.name_to_id(t2_name),
                                           similarity)

    ##################################################################
    # Train SVMs
    ##################################################################

    # create shogun objects
    wdk_tree = shogun_factory.create_kernel(data.examples, param)
    lab = shogun_factory.create_labels(data.labels)

    wdk_tree.set_normalizer(tree_normalizer)
    wdk_tree.init_normalizer()

    print "--->", wdk_tree.get_normalizer().get_name()

    svm_tree = SVMLight(cost, wdk_tree, lab)
    svm_tree.set_linadd_enabled(False)
    svm_tree.set_batch_computation_enabled(False)

    svm_tree.train()

    del wdk_tree
    del tree_normalizer

    print "finished training tree-norm SVM:", svm_tree.get_objective()

    wdk = shogun_factory.create_kernel(data.examples, param)
    wdk.set_normalizer(normalizer)
    wdk.init_normalizer()

    print "--->", wdk.get_normalizer().get_name()

    svm = SVMLight(cost, wdk, lab)
    svm.set_linadd_enabled(False)
    svm.set_batch_computation_enabled(False)

    svm.train()

    print "finished training manually set SVM:", svm.get_objective()

    alphas_tree = svm_tree.get_alphas()
    alphas = svm.get_alphas()

    assert (len(alphas_tree) == len(alphas))

    for i in xrange(len(alphas)):
        assert (abs(alphas_tree[i] - alphas[i]) < 0.0001)

    print "success: all alphas are the same"
svm_tree.train()

del wdk_tree
del tree_normalizer

print "finished training tree-norm SVM:", svm_tree.get_objective()

wdk = shogun_factory.create_kernel(data.examples, param)
wdk.set_normalizer(normalizer)
wdk.init_normalizer()

print "--->", wdk.get_normalizer().get_name()

svm = SVMLight(cost, wdk, lab)
svm.set_linadd_enabled(False)
svm.set_batch_computation_enabled(False)

svm.train()

print "finished training manually set SVM:", svm.get_objective()

alphas_tree = svm_tree.get_alphas()
alphas = svm.get_alphas()

assert (len(alphas_tree) == len(alphas))

for i in xrange(len(alphas)):
    assert (abs(alphas_tree[i] - alphas[i]) < 0.0001)

print "success: all alphas are the same"