Пример #1
0
def classifier_multiclassmachine_modular(fm_train_real=traindat,
                                         fm_test_real=testdat,
                                         label_train_multiclass=label_traindat,
                                         width=2.1,
                                         C=1,
                                         epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Kernel import GaussianKernel
    from shogun.Classifier import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibSVM()
    classifier.set_epsilon(epsilon)
    print labels.get_labels()
    mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),
                                            kernel, classifier, labels)
    mc_classifier.train()

    kernel.init(feats_train, feats_test)
    out = mc_classifier.apply().get_labels()
    return out
Пример #2
0
class svm_splice_model(object):
    def __init__(self, order, traindat, alphas, b, (window_left, offset,
                                                    window_right), consensus):

        f = StringCharFeatures(traindat, DNA)
        wd_kernel = WeightedDegreeStringKernel(f, f, int(order))
        wd_kernel.io.set_target_to_stdout()

        self.svm = LibSVM()
        self.svm.set_kernel(wd_kernel)
        self.svm.set_alphas(alphas)
        self.svm.set_support_vectors(
            numpy.arange(len(alphas), dtype=numpy.int32))
        self.svm.set_bias(b)
        self.svm.io.set_target_to_stdout()
        self.svm.parallel.set_num_threads(self.svm.parallel.get_num_cpus())
        self.svm.set_linadd_enabled(True)
        self.svm.set_batch_computation_enabled(True)

        self.window_left = int(window_left)
        self.window_right = int(window_right)

        self.consensus = consensus
        self.wd_kernel = wd_kernel
        self.traindat = f
        self.offset = offset
Пример #3
0
def classifier_libsvm_modular(fm_train_real=traindat,
                              fm_test_real=testdat,
                              label_train_twoclass=label_traindat,
                              width=2.1,
                              C=1,
                              epsilon=1e-5):
    from shogun.Features import RealFeatures, Labels
    from shogun.Kernel import GaussianKernel
    from shogun.Classifier import LibSVM

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    kernel = GaussianKernel(feats_train, feats_train, width)
    labels = Labels(label_train_twoclass)

    svm = LibSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    kernel.init(feats_train, feats_test)
    labels = svm.classify().get_labels()
    supportvectors = sv_idx = svm.get_support_vectors()
    alphas = svm.get_alphas()
    predictions = svm.classify()
    return predictions, svm, predictions.get_labels()
Пример #4
0
def svm_learn(kernel, labels, options):
    """train SVM using SVMLight or LibSVM

	Arguments:
	kernel -- kernel object from Shogun toolbox
	lebels -- list of labels
	options -- object containing option data 

	Return:
	trained svm object 
	"""

    try:
        svm = SVMLight(options.svmC, kernel,
                       Labels(numpy.array(labels, dtype=numpy.double)))
    except NameError:
        svm = LibSVM(options.svmC, kernel,
                     Labels(numpy.array(labels, dtype=numpy.double)))

    if options.quiet == False:
        svm.io.set_loglevel(MSG_INFO)
        svm.io.set_target_to_stderr()

    svm.set_epsilon(options.epsilon)
    svm.parallel.set_num_threads(1)
    if options.weight != 1.0:
        svm.set_C(options.svmC, options.svmC * options.weight)
    svm.train()

    if options.quiet == False:
        svm.io.set_loglevel(MSG_ERROR)

    return svm
Пример #5
0
def libsvm():
    print 'LibSVM'

    from shogun.Features import RealFeatures, Labels
    from shogun.Kernel import GaussianKernel
    from shogun.Evaluation import PerformanceMeasures
    from shogun.Classifier import LibSVM

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    width = 2.1
    kernel = GaussianKernel(feats_train, feats_train, width)

    C = 1
    epsilon = 1e-5
    labels = Labels(label_train_twoclass)

    svm = LibSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    #kernel.init(feats_train, feats_test)
    output = svm.classify(feats_test)  #.get_labels()
    #output_vector = output.get_labels()
    out = svm.classify().get_labels()
    testerr = mean(sign(out) != testlab)
    print testerr
Пример #6
0
def modelselection_grid_search_kernel():
    num_subsets = 3
    num_vectors = 20
    dim_vectors = 3

    # create some (non-sense) data
    matrix = rand(dim_vectors, num_vectors)

    # create num_feautres 2-dimensional vectors
    features = RealFeatures()
    features.set_feature_matrix(matrix)

    # create labels, two classes
    labels = Labels(num_vectors)
    for i in range(num_vectors):
        labels.set_label(i, 1 if i % 2 == 0 else -1)

    # create svm
    classifier = LibSVM()

    # splitting strategy
    splitting_strategy = StratifiedCrossValidationSplitting(
        labels, num_subsets)

    # accuracy evaluation
    evaluation_criterion = ContingencyTableEvaluation(ACCURACY)

    # cross validation class for evaluation in model selection
    cross = CrossValidation(classifier, features, labels, splitting_strategy,
                            evaluation_criterion)
    cross.set_num_runs(1)

    # print all parameter available for modelselection
    # Dont worry if yours is not included, simply write to the mailing list
    classifier.print_modsel_params()

    # model parameter selection
    param_tree = create_param_tree()
    param_tree.print_tree()

    grid_search = GridSearchModelSelection(param_tree, cross)

    print_state = True
    best_combination = grid_search.select_model(print_state)
    print("best parameter(s):")
    best_combination.print_tree()

    best_combination.apply_to_machine(classifier)

    # larger number of runs to have tighter confidence intervals
    cross.set_num_runs(10)
    cross.set_conf_int_alpha(0.01)
    result = cross.evaluate()
    print("result: ")
    result.print_result()

    return 0
Пример #7
0
def modelselection_grid_search_kernel (num_subsets, num_vectors, dim_vectors):
	# init seed for reproducability
	Math.init_random(1)
	random.seed(1);
	
	# create some (non-sense) data
	matrix=random.rand(dim_vectors, num_vectors)

	# create num_feautres 2-dimensional vectors
	features=RealFeatures()
	features.set_feature_matrix(matrix)

	# create labels, two classes
	labels=BinaryLabels(num_vectors)
	for i in range(num_vectors):
		labels.set_label(i, 1 if i%2==0 else -1)

	# create svm
	classifier=LibSVM()

	# splitting strategy
	splitting_strategy=StratifiedCrossValidationSplitting(labels, num_subsets)

	# accuracy evaluation
	evaluation_criterion=ContingencyTableEvaluation(ACCURACY)

	# cross validation class for evaluation in model selection
	cross=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion)
	cross.set_num_runs(1)

	# print all parameter available for modelselection
	# Dont worry if yours is not included, simply write to the mailing list
	#classifier.print_modsel_params()

	# model parameter selection
	param_tree=create_param_tree()
	#param_tree.print_tree()

	grid_search=GridSearchModelSelection(param_tree, cross)

	print_state=False
	best_combination=grid_search.select_model(print_state)
	#print("best parameter(s):")
	#best_combination.print_tree()

	best_combination.apply_to_machine(classifier)

	# larger number of runs to have tighter confidence intervals
	cross.set_num_runs(10)
	cross.set_conf_int_alpha(0.01)
	result=cross.evaluate()
	casted=CrossValidationResult.obtain_from_generic(result);
	#print "result mean:", casted.mean

	return classifier,result,casted.mean
Пример #8
0
def evaluation_cross_validation_mkl_weight_storage(traindat=traindat, label_traindat=label_traindat):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import CrossValidationPrintOutput
    from shogun.Evaluation import CrossValidationMKLStorage
    from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
    from shogun.Evaluation import StratifiedCrossValidationSplitting
    from shogun.Features import BinaryLabels
    from shogun.Features import RealFeatures, CombinedFeatures
    from shogun.Kernel import GaussianKernel, CombinedKernel
    from shogun.Classifier import LibSVM, MKLClassification
    from shogun.Mathematics import Statistics

    # training data, combined features all on same data
    features=RealFeatures(traindat)
    comb_features=CombinedFeatures()
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    labels=BinaryLabels(label_traindat)
    
    # kernel, different Gaussians combined
    kernel=CombinedKernel()
    kernel.append_kernel(GaussianKernel(10, 0.1))
    kernel.append_kernel(GaussianKernel(10, 1))
    kernel.append_kernel(GaussianKernel(10, 2))

    # create mkl using libsvm, due to a mem-bug, interleaved is not possible
    svm=MKLClassification(LibSVM());
    svm.set_interleaved_optimization_enabled(False);
    svm.set_kernel(kernel);

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy=StratifiedCrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium=ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation=CrossValidation(svm, comb_features, labels,
        splitting_strategy, evaluation_criterium)
    cross_validation.set_autolock(False)

    # append cross vlaidation output classes
    #cross_validation.add_cross_validation_output(CrossValidationPrintOutput())
    mkl_storage=CrossValidationMKLStorage()
    cross_validation.add_cross_validation_output(mkl_storage)
    cross_validation.set_num_runs(3)
    
    # perform cross-validation
    result=cross_validation.evaluate()

    # print mkl weights
    weights=mkl_storage.get_mkl_weights()
Пример #9
0
def svm_train(kernel, labels, C1, C2=None):
    """Trains a SVM with the given kernel"""

    num_threads = 1

    kernel.io.disable_progress()
    svm = LibSVM(C1, kernel, labels)
    if C2:
        svm.set_C(C1, C2)
    svm.parallel.set_num_threads(num_threads)
    svm.io.disable_progress()
    svm.train()

    return svm
Пример #10
0
def init_svm(task_type, kernel, labels):
    """A factory for creating the right svm type"""
    C=1
    epsilon=1e-5
    if task_type == 'Binary Classification':
        svm = LibSVM(C, kernel, labels)
    elif task_type == 'Multi Class Classification':
        svm = LibSVMMultiClass(C, kernel, labels)
    elif task_type == 'Regression':
        tube_epsilon=1e-2
        svm=LibSVR(C, epsilon, kernel, labels)
        svm.set_tube_epsilon(tube_epsilon)
    else:
        print(task_type + ' unknown!')

    return svm
def kernel_combined_custom_poly_modular(fm_train_real=traindat,
                                        fm_test_real=testdat,
                                        fm_label_twoclass=label_traindat):
    from shogun.Features import CombinedFeatures, RealFeatures, Labels
    from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel
    from shogun.Classifier import LibSVM

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()

    tfeats = RealFeatures(fm_train_real)
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, tfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_train = RealFeatures(fm_train_real)
    feats_train.append_feature_obj(subkfeats_train)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = Labels(fm_label_twoclass)
    svm = LibSVM(1.0, kernel, labels)
    svm.train()

    kernel = CombinedKernel()
    feats_pred = CombinedFeatures()

    pfeats = RealFeatures(fm_test_real)
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, pfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_test = RealFeatures(fm_test_real)
    feats_pred.append_feature_obj(subkfeats_test)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_pred)

    svm.set_kernel(kernel)
    svm.classify()
    km_train = kernel.get_kernel_matrix()
    return km_train, kernel
Пример #12
0
def bench_shogun(X, y, T, valid):
#
#       .. Shogun ..
#
    from shogun.Classifier import LibSVM
    from shogun.Features import RealFeatures, Labels
    from shogun.Kernel import GaussianKernel
    start = datetime.now()
    feat = RealFeatures(X.T)
    feat_test = RealFeatures(T.T)
    labels = Labels(y.astype(np.float64))
    kernel = GaussianKernel(feat, feat, sigma)
    shogun_svm = LibSVM(1., kernel, labels)
    shogun_svm.train()
    dec_func = shogun_svm.classify(feat_test).get_labels()
    score = np.mean(np.sign(dec_func) == valid)
    return score, datetime.now() - start
def classifier_libsvm_minimal_modular(fm_train_real=traindat,
                                      fm_test_real=testdat,
                                      label_train_twoclass=label_traindat,
                                      width=2.1,
                                      C=1):
    from shogun.Features import RealFeatures, BinaryLabels
    from shogun.Classifier import LibSVM
    from shogun.Kernel import GaussianKernel

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = BinaryLabels(label_train_twoclass)
    svm = LibSVM(C, kernel, labels)
    svm.train()

    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    testerr = mean(sign(out) != label_train_twoclass)
Пример #14
0
def classifier_custom_kernel_modular(C=1, dim=7):
    from shogun.Features import RealFeatures, Labels
    from shogun.Kernel import CustomKernel
    from shogun.Classifier import LibSVM

    from numpy import diag, ones, sign
    from numpy.random import rand, seed

    seed((C, dim))

    lab = sign(2 * rand(dim) - 1)
    data = rand(dim, dim)
    symdata = data * data.T + diag(ones(dim))

    kernel = CustomKernel()
    kernel.set_full_kernel_matrix_from_full(data)
    labels = Labels(lab)
    svm = LibSVM(C, kernel, labels)
    svm.train()
    predictions = svm.apply()
    out = svm.apply().get_labels()
    return svm, out
Пример #15
0
labels_presvm[2] = 1
labels_presvm[12] = 1
labels_presvm[15] = 1
labels_presvm[8] = 1
labels_presvm[19] = 1

feat_presvm = StringCharFeatures(DNA)
feat_presvm.set_features(examples_presvm)
wdk_presvm = WeightedDegreeStringKernel(feat_presvm, feat_presvm, 1)
lab_presvm = Labels(numpy.array(labels_presvm))

presvm = SVMLight(1, wdk_presvm, lab_presvm)
presvm.train()

presvm2 = LibSVM(1, wdk_presvm, lab_presvm)
presvm2.train()

print "svmlight", presvm.get_objective()
print "libsvm", presvm2.get_objective()

assert (abs(presvm.get_objective() - presvm2.get_objective()) <= 0.001)

print "simple svm", presvm.get_objective()

print "len(examples_presvm)", len(examples_presvm)

print "##############"

#############################################
#    compute linear term manually
from numpy import random
from shogun.Classifier import LibSVM
from shogun.Features import RealFeatures, Labels
from shogun.Kernel import LinearKernel

num_feats = 23
num_vec = 42

scale = 2.1
size_cache = 10

C = 0.017
epsilon = 1e-5
tube_epsilon = 1e-2
svm = LibSVM()
svm.set_C(C, C)
svm.set_epsilon(epsilon)
svm.set_tube_epsilon(tube_epsilon)

for i in xrange(3):
    data_train = random.rand(num_feats, num_vec)
    data_test = random.rand(num_feats, num_vec)
    feats_train = RealFeatures(data_train)
    feats_test = RealFeatures(data_test)
    labels = Labels(random.rand(num_vec).round() * 2 - 1)

    svm.set_kernel(LinearKernel(size_cache, scale))
    svm.set_labels(labels)

    kernel = svm.get_kernel()
##################################################################

km = wdk.get_kernel_matrix()

for i in xrange(N):
    for j in xrange(N):
        km[i, j] = km[i, j] * relate_tasks(i, j)
        #km = km*1.0

print km
#precompute kernel matrix using shogun
y = numpy.array(labels)
K = numpy.transpose(y.flatten() * (km * y.flatten()).transpose())
f = -numpy.ones(N)
C = 1.0

# Important!! QP does not accept ndarray as a type, it must be an array
p = QP(K, f, Aeq=y, beq=0, lb=numpy.zeros(N), ub=C * numpy.ones(N))
r = p.solve('cvxopt_qp', iprint=0)

#print "cvxopt objective:", r.ff
print "externally modified kernel. objective:", r.ff

ck = CustomKernel()
ck.set_full_kernel_matrix_from_full(km)
#
svm = LibSVM(1, ck, lab)
svm.train()

print "externally modified kernel. objective:", svm.get_objective()
Пример #18
0
#############################################

# create real-valued features as first step
examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64)
examples_presvm = numpy.transpose(examples_presvm)

feat_presvm = RealFeatures(examples_presvm)
lab_presvm = Labels(numpy.array(labels_presvm))
wdk_presvm = LinearKernel(feat_presvm, feat_presvm)

presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm)
presvm_liblinear.set_max_iterations(10000)
presvm_liblinear.set_bias_enabled(False)
presvm_liblinear.train()

presvm_libsvm = LibSVM(1, wdk_presvm, lab_presvm)
#presvm_libsvm = SVMLight(1, wdk_presvm, lab_presvm)

#presvm_libsvm.io.set_loglevel(MSG_DEBUG)
presvm_libsvm.set_bias_enabled(False)
presvm_libsvm.train()

my_w = presvm_liblinear.get_w()
presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm)
presvm_liblinear.set_w(my_w)

#############################################
#    compute linear term manually
#############################################

examples = numpy.array(examples, dtype=numpy.float64)
Пример #19
0
Файл: prc.py Проект: frx/shogun
from shogun.Classifier import LibSVM, LDA
from shogun.Evaluation import PRCEvaluation
import util

util.set_title('PRC example')
util.DISTANCE=0.5
subplots_adjust(hspace=0.3)

pos=util.get_realdata(True)
neg=util.get_realdata(False)
features=util.get_realfeatures(pos, neg)
labels=util.get_labels()

# classifiers
gk=GaussianKernel(features, features, 1.0)
svm = LibSVM(1000.0, gk, labels)
svm.train()
lda=LDA(1,features,labels)
lda.train()

## plot points
subplot(211)
plot(pos[0,:], pos[1,:], "r.")
plot(neg[0,:], neg[1,:], "b.")
grid(True)
title('Data',size=10)

# plot PRC for SVM
subplot(223)
PRC_evaluation=PRCEvaluation()
PRC_evaluation.evaluate(svm.classify(),labels)
Пример #20
0
def training_run(options):
    """Conduct a training run and return a trained SVM kernel"""
    settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH,
                                   options.window_width, options.replace)
    positives = MotifFinder(finder_settings=settings)
    positives.setFastaFile(options.positives)
    positives.setMotifs(options.pgff)
    pmotifs, ppositions = positives.getResults()
    negatives = MotifFinder(finder_settings=settings)
    negatives.setFastaFile(options.negatives)
    negatives.setMotifs(options.ngff)
    nmotifs, npositions = negatives.getResults()

    wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS
    wds_svm = EasySVM.EasySVM(wds_kparams)
    num_positives = len(pmotifs.values()[0])
    num_negatives = len(nmotifs.values()[0])
    #Creating Kernel Objects
    kernel = CombinedKernel()
    features = CombinedFeatures()
    kernel_array = []
    motifs = pmotifs.keys()
    motifs.sort()
    #Adding Kmer Kernels
    for motif in motifs:
        all_examples = pmotifs[motif] + nmotifs[motif]
        motif_features = wds_svm.createFeatures(all_examples)
        wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, \
                                                        wds_kparams['degree'])
        wds_kernel.set_shifts(wds_kparams['shift'] *
                              ones(wds_kparams['seqlength'], dtype=int32))
        features.append_feature_obj(motif_features)
        kernel_array.append(wds_kernel)
        kernel.append_kernel(wds_kernel)
    rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS)
    positions = array(ppositions + npositions, dtype=float64).T
    position_features = rbf_svm.createFeatures(positions)
    features.append_feature_obj(position_features)
    motif_labels = append(ones(num_positives), -ones(num_negatives))
    complete_labels = Labels(motif_labels)
    rbf_kernel = GaussianKernel(position_features, position_features, \
                                kirmes_ini.RBF_KERNEL_PARAMETERS['width'])
    kernel_array.append(rbf_kernel)
    kernel.append_kernel(rbf_kernel)
    #Kernel init
    kernel.init(features, features)
    kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE)
    svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels)
    svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS)
    #Training
    svm.train()
    if not os.path.exists(options.output_path):
        os.mkdir(options.output_path)
    html = {}
    if options.contrib:
        html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array,
                                  motifs)
    if options.logos:
        html["poims"] = poims(svm, kernel, kernel_array, motifs,
                              options.output_path)
    if options.query:
        html["query"] = evaluate(options, svm, kernel, features, motifs)
    htmlize(html, options.output_html)