Пример #1
0
def plotprc(output, LTE, figure_fname="", prc_label='PRC'):
    """Plot the precision recall curve"""
    import pylab
    import matplotlib

    pylab.figure(2, dpi=300, figsize=(8, 8))

    pm = PRCEvaluation()
    pm.evaluate(BinaryLabels(numpy.array(output)),
                BinaryLabels(numpy.array(LTE)))

    points = pm.get_PRC()
    points = numpy.array(points).T  # for pylab.plot
    pylab.plot(points[0], points[1], 'b-', label=prc_label)
    pylab.axis([0, 1, 0, 1])
    ticks = numpy.arange(0., 1., .1, dtype=numpy.float64)
    pylab.xticks(ticks, size=10)
    pylab.yticks(ticks, size=10)
    pylab.xlabel('sensitivity (true positive rate)', size=10)
    pylab.ylabel('precision (1 - false discovery rate)', size=10)
    pylab.legend(loc='lower right')

    if figure_fname != None:
        warnings.filterwarnings('ignore', 'Could not match*')
        tempfname = figure_fname + '.png'
        pylab.savefig(tempfname)
        shutil.move(tempfname, figure_fname)

    auPRC = pm.get_auPRC()
    return auPRC
def classifier_domainadaptationsvm_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna, \
                                               label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \
                                               label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3):

    feats_train = StringCharFeatures(fm_train_dna, DNA)
    feats_test = StringCharFeatures(fm_test_dna, DNA)
    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)
    labels = BinaryLabels(label_train_dna)
    svm = SVMLight(C, kernel, labels)
    svm.train()
    #svm.io.set_loglevel(MSG_DEBUG)

    #####################################

    #print("obtaining DA SVM from previously trained SVM")

    feats_train2 = StringCharFeatures(fm_train_dna, DNA)
    feats_test2 = StringCharFeatures(fm_test_dna, DNA)
    kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree)
    labels2 = BinaryLabels(label_train_dna)

    # we regularize against the previously obtained solution
    dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0)
    dasvm.train()

    out = dasvm.apply_binary(feats_test2)

    return out  #,dasvm TODO
Пример #3
0
def plotroc(output, LTE, draw_random=False, figure_fname="", roc_label='ROC'):
    """Plot the receiver operating characteristic curve"""
    import pylab
    import matplotlib

    pylab.figure(1, dpi=300, figsize=(8, 8))
    fontdict = dict(family="serif", weight="bold", size=7, y=1.05)
    # family="cursive"

    pm = ROCEvaluation()
    pm.evaluate(BinaryLabels(numpy.array(output)),
                BinaryLabels(numpy.array(LTE)))

    points = pm.get_ROC()
    points = numpy.array(points).T  # for pylab.plot
    pylab.plot(points[0], points[1], 'b-', label=roc_label)
    if draw_random:
        pylab.plot([0, 1], [0, 1], 'r-', label='random guessing')
    pylab.axis([0, 1, 0, 1])
    ticks = numpy.arange(0., 1., .1, dtype=numpy.float64)
    pylab.xticks(ticks, size=10)
    pylab.yticks(ticks, size=10)
    pylab.xlabel('1 - specificity (false positive rate)', size=10)
    pylab.ylabel('sensitivity (true positive rate)', size=10)
    pylab.legend(loc='lower right'
                 )  #, prop = matplotlib.font_manager.FontProperties('small'))

    if figure_fname != None:
        warnings.filterwarnings('ignore', 'Could not match*')
        tempfname = figure_fname + '.png'
        pylab.savefig(tempfname)
        shutil.move(tempfname, figure_fname)

    auROC = pm.get_auROC()
    return auROC
def evaluation_director_contingencytableevaluation_modular(
        ground_truth, predicted):
    try:
        from shogun.Evaluation import DirectorContingencyTableEvaluation, ED_MAXIMIZE
    except ImportError:
        print("recompile shogun with --enable-swig-directors")
        return

    class SimpleWeightedBinaryEvaluator(DirectorContingencyTableEvaluation):
        def __init__(self):
            DirectorContingencyTableEvaluation.__init__(self)

        def get_custom_direction(self):
            return ED_MAXIMIZE

        def get_custom_score(self):
            return self.get_WRACC() + self.get_BAL()

    from shogun.Features import BinaryLabels

    evaluator = SimpleWeightedBinaryEvaluator()
    r = evaluator.evaluate(BinaryLabels(ground_truth), BinaryLabels(predicted))
    r2 = evaluator.get_custom_score()
    print(r, r2)

    return r, r2
def evaluation_director_contingencytableevaluation_modular (ground_truth, predicted):
	from shogun.Features import BinaryLabels

	evaluator = SimpleWeightedBinaryEvaluator()
	r = evaluator.evaluate(BinaryLabels(ground_truth), BinaryLabels(predicted))
	r2 = evaluator.get_custom_score()

	return r==r2
Пример #6
0
def calcprc(output, LTE):
    """The area under the precision recall curve"""
    pm = PRCEvaluation()
    pm.evaluate(BinaryLabels(numpy.array(output)),
                BinaryLabels(numpy.array(LTE)))

    auPRC = pm.get_auPRC()
    return auPRC
Пример #7
0
def calcroc(output, LTE):
    """The area under the receiver operating characteristic curve"""
    pm = ROCEvaluation()
    pm.evaluate(BinaryLabels(numpy.array(output)),
                BinaryLabels(numpy.array(LTE)))

    auROC = pm.get_auROC()
    return auROC
def evaluation_prcevaluation_modular(ground_truth, predicted):
    from shogun.Features import BinaryLabels
    from shogun.Evaluation import PRCEvaluation

    ground_truth_labels = BinaryLabels(ground_truth)
    predicted_labels = BinaryLabels(predicted)

    evaluator = PRCEvaluation()
    evaluator.evaluate(predicted_labels, ground_truth_labels)

    return evaluator.get_PRC(), evaluator.get_auPRC()
def classifier_svmocas_modular(fm_train_real=traindat,
                               fm_test_real=testdat,
                               label_train_twoclass=label_traindat,
                               C=0.9,
                               epsilon=1e-5,
                               num_threads=1):

    from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels
    from shogun.Classifier import SVMOcas

    realfeat = RealFeatures(fm_train_real)
    feats_train = SparseRealFeatures()
    feats_train.obtain_from_simple(realfeat)
    realfeat = RealFeatures(fm_test_real)
    feats_test = SparseRealFeatures()
    feats_test.obtain_from_simple(realfeat)

    labels = BinaryLabels(label_train_twoclass)

    svm = SVMOcas(C, feats_train, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.set_bias_enabled(False)
    svm.train()

    svm.set_features(feats_test)
    svm.apply().get_labels()
    predictions = svm.apply()
    return predictions, svm, predictions.get_labels()
Пример #10
0
def classifier_mpdsvm_modular(fm_train_real=traindat,
                              fm_test_real=testdat,
                              label_train_twoclass=label_traindat,
                              C=1,
                              epsilon=1e-5):

    from shogun.Features import RealFeatures, BinaryLabels
    from shogun.Kernel import GaussianKernel
    from shogun.Classifier import MPDSVM

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    width = 2.1
    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = BinaryLabels(label_train_twoclass)

    svm = MPDSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    kernel.init(feats_train, feats_test)
    svm.apply().get_labels()
    predictions = svm.apply()
    return predictions, svm, predictions.get_labels()
Пример #11
0
def classifier_libsvm_modular(fm_train_real=traindat,
                              fm_test_real=testdat,
                              label_train_twoclass=label_traindat,
                              width=2.1,
                              C=1,
                              epsilon=1e-5):
    from shogun.Features import RealFeatures, BinaryLabels
    from shogun.Kernel import GaussianKernel
    from shogun.Classifier import LibSVM

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    kernel = GaussianKernel(feats_train, feats_train, width)
    labels = BinaryLabels(label_train_twoclass)

    svm = LibSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    kernel.init(feats_train, feats_test)
    labels = svm.apply().get_labels()
    supportvectors = sv_idx = svm.get_support_vectors()
    alphas = svm.get_alphas()
    predictions = svm.apply()
    print predictions.get_labels()
    return predictions, svm, predictions.get_labels()
Пример #12
0
def classifier_svmsgd_modular(fm_train_real=traindat,
                              fm_test_real=testdat,
                              label_train_twoclass=label_traindat,
                              C=0.9,
                              num_threads=1,
                              num_iter=5):

    from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels
    from shogun.Classifier import SVMSGD

    realfeat = RealFeatures(fm_train_real)
    feats_train = SparseRealFeatures()
    feats_train.obtain_from_simple(realfeat)
    realfeat = RealFeatures(fm_test_real)
    feats_test = SparseRealFeatures()
    feats_test.obtain_from_simple(realfeat)

    labels = BinaryLabels(label_train_twoclass)

    svm = SVMSGD(C, feats_train, labels)
    svm.set_epochs(num_iter)
    #svm.io.set_loglevel(0)
    svm.train()

    svm.set_features(feats_test)
    svm.apply().get_labels()
    predictions = svm.apply()
    return predictions, svm, predictions.get_labels()
Пример #13
0
def kernel_histogram_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,order=3,gap=0,reverse=False):

	from shogun.Features import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels
	from shogun.Kernel import HistogramWordStringKernel
	from shogun.Classifier import PluginEstimate#, MSG_DEBUG

	reverse = reverse
	charfeat=StringCharFeatures(DNA)
	#charfeat.io.set_loglevel(MSG_DEBUG)
	charfeat.set_features(fm_train_dna)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	pie=PluginEstimate()
	labels=BinaryLabels(label_train_dna)
	pie.set_labels(labels)
	pie.set_features(feats_train)
	pie.train()

	kernel=HistogramWordStringKernel(feats_train, feats_train, pie)
	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	pie.set_features(feats_test)
	pie.apply().get_labels()
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna,degree=3, \
                                                C=10,epsilon=1e-5,num_threads=1):
    
    from shogun.Features import StringCharFeatures, BinaryLabels, DNA
    from shogun.Kernel import WeightedDegreeStringKernel
    from shogun.Classifier import SVMLight
    
    feats_train=StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test=StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    
    kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)
    
    labels=BinaryLabels(label_train_dna)
    
    svm=SVMLight(C, kernel, labels)
    svm.set_qpsize(3)
    svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double));
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()
    
    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    return out,kernel
Пример #15
0
def classifier_subgradientsvm_modular(fm_train_real, fm_test_real,
                                      label_train_twoclass, C, epsilon,
                                      max_train_time):

    from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels
    from shogun.Classifier import SubGradientSVM

    realfeat = RealFeatures(fm_train_real)
    feats_train = SparseRealFeatures()
    feats_train.obtain_from_simple(realfeat)
    realfeat = RealFeatures(fm_test_real)
    feats_test = SparseRealFeatures()
    feats_test.obtain_from_simple(realfeat)

    labels = BinaryLabels(label_train_twoclass)

    svm = SubGradientSVM(C, feats_train, labels)
    svm.set_epsilon(epsilon)
    svm.set_max_train_time(max_train_time)
    svm.train()

    svm.set_features(feats_test)
    labels = svm.apply().get_labels()

    return labels, svm
Пример #16
0
def classifier_svmlight_modular(fm_train_dna=traindat,
                                fm_test_dna=testdat,
                                label_train_dna=label_traindat,
                                C=1.2,
                                epsilon=1e-5,
                                num_threads=1):
    from shogun.Features import StringCharFeatures, BinaryLabels, DNA
    from shogun.Kernel import WeightedDegreeStringKernel
    try:
        from shogun.Classifier import SVMLight
    except ImportError:
        print('No support for SVMLight available.')
        return

    feats_train = StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test = StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    degree = 20

    kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)

    labels = BinaryLabels(label_train_dna)

    svm = SVMLight(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()

    kernel.init(feats_train, feats_test)
    svm.apply().get_labels()
    return kernel
Пример #17
0
    def log_pdf(self, thetas):
        assert (len(shape(thetas)) == 2)
        assert (shape(thetas)[1] == self.dimension)

        result = zeros(len(thetas))
        for i in range(len(thetas)):
            labels = BinaryLabels(self.y)
            feats_train = RealFeatures(self.X.T)

            # ARD: set set theta, which is in log-scale, as kernel weights
            kernel = GaussianARDKernel(10, 1)
            kernel.set_weights(exp(thetas[i]))

            mean = ZeroMean()
            likelihood = LogitLikelihood()
            inference = LaplacianInferenceMethod(kernel, feats_train, mean,
                                                 labels, likelihood)

            # fix kernel scaling for now
            inference.set_scale(exp(0))

            if self.ridge is not None:
                log_ml_estimate = inference.get_marginal_likelihood_estimate(
                    self.n_importance, self.ridge)
            else:
                log_ml_estimate = inference.get_marginal_likelihood_estimate(
                    self.n_importance)

            # prior is also in log-domain, so no exp of theta
            log_prior = self.prior.log_pdf(thetas[i].reshape(
                1, len(thetas[i])))
            result[i] = log_ml_estimate + log_prior

        return result
def features_director_dot_modular(fm_train_real, fm_test_real,
                                  label_train_twoclass, C, epsilon):

    from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL
    from shogun.Mathematics import Math_init_random
    Math_init_random(17)

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    labels = BinaryLabels(label_train_twoclass)

    dfeats_train = NumpyFeatures(fm_train_real)
    dfeats_test = NumpyFeatures(fm_test_real)
    dlabels = BinaryLabels(label_train_twoclass)

    print feats_train.get_computed_dot_feature_matrix()
    print dfeats_train.get_computed_dot_feature_matrix()

    svm = LibLinear(C, feats_train, labels)
    svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
    svm.set_epsilon(epsilon)
    svm.set_bias_enabled(True)
    svm.train()

    svm.set_features(feats_test)
    svm.apply().get_labels()
    predictions = svm.apply()

    dfeats_train.__disown__()
    dfeats_train.parallel.set_num_threads(1)
    dsvm = LibLinear(C, dfeats_train, dlabels)
    dsvm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
    dsvm.set_epsilon(epsilon)
    dsvm.set_bias_enabled(True)
    dsvm.train()

    dfeats_test.__disown__()
    dfeats_test.parallel.set_num_threads(1)
    dsvm.set_features(dfeats_test)
    dsvm.apply().get_labels()
    dpredictions = dsvm.apply()

    return predictions, svm, predictions.get_labels()
Пример #19
0
def evaluation_cross_validation_mkl_weight_storage(traindat=traindat, label_traindat=label_traindat):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import CrossValidationPrintOutput
    from shogun.Evaluation import CrossValidationMKLStorage
    from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
    from shogun.Evaluation import StratifiedCrossValidationSplitting
    from shogun.Features import BinaryLabels
    from shogun.Features import RealFeatures, CombinedFeatures
    from shogun.Kernel import GaussianKernel, CombinedKernel
    from shogun.Classifier import LibSVM, MKLClassification
    from shogun.Mathematics import Statistics

    # training data, combined features all on same data
    features=RealFeatures(traindat)
    comb_features=CombinedFeatures()
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    labels=BinaryLabels(label_traindat)
    
    # kernel, different Gaussians combined
    kernel=CombinedKernel()
    kernel.append_kernel(GaussianKernel(10, 0.1))
    kernel.append_kernel(GaussianKernel(10, 1))
    kernel.append_kernel(GaussianKernel(10, 2))

    # create mkl using libsvm, due to a mem-bug, interleaved is not possible
    svm=MKLClassification(LibSVM());
    svm.set_interleaved_optimization_enabled(False);
    svm.set_kernel(kernel);

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy=StratifiedCrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium=ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation=CrossValidation(svm, comb_features, labels,
        splitting_strategy, evaluation_criterium)
    cross_validation.set_autolock(False)

    # append cross vlaidation output classes
    #cross_validation.add_cross_validation_output(CrossValidationPrintOutput())
    mkl_storage=CrossValidationMKLStorage()
    cross_validation.add_cross_validation_output(mkl_storage)
    cross_validation.set_num_runs(3)
    
    # perform cross-validation
    result=cross_validation.evaluate()

    # print mkl weights
    weights=mkl_storage.get_mkl_weights()
def modelselection_grid_search_kernel():
	num_subsets=3
	num_vectors=20
	dim_vectors=3

	# create some (non-sense) data
	matrix=rand(dim_vectors, num_vectors)

	# create num_feautres 2-dimensional vectors
	features=RealFeatures()
	features.set_feature_matrix(matrix)

	# create labels, two classes
	labels=BinaryLabels(num_vectors)
	for i in range(num_vectors):
		labels.set_label(i, 1 if i%2==0 else -1)

	# create svm
	classifier=LibSVM()

	# splitting strategy
	splitting_strategy=StratifiedCrossValidationSplitting(labels, num_subsets)

	# accuracy evaluation
	evaluation_criterion=ContingencyTableEvaluation(ACCURACY)

	# cross validation class for evaluation in model selection
	cross=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion)
	cross.set_num_runs(1)

	# print all parameter available for modelselection
	# Dont worry if yours is not included, simply write to the mailing list
	classifier.print_modsel_params()

	# model parameter selection
	param_tree=create_param_tree()
	param_tree.print_tree()

	grid_search=GridSearchModelSelection(param_tree, cross)

	print_state=True
	best_combination=grid_search.select_model(print_state)
	print("best parameter(s):")
	best_combination.print_tree()

	best_combination.apply_to_machine(classifier)

	# larger number of runs to have tighter confidence intervals
	cross.set_num_runs(10)
	cross.set_conf_int_alpha(0.01)
	result=cross.evaluate()
	print("result: ")
	result.print_result()

	return 0
Пример #21
0
def evaluation_contingencytableevaluation_modular(ground_truth, predicted):
	from shogun.Features import BinaryLabels
	from shogun.Evaluation import ContingencyTableEvaluation
	from shogun.Evaluation import AccuracyMeasure,ErrorRateMeasure,BALMeasure
	from shogun.Evaluation import WRACCMeasure,F1Measure,CrossCorrelationMeasure
	from shogun.Evaluation import RecallMeasure,PrecisionMeasure,SpecificityMeasure

	ground_truth_labels = BinaryLabels(ground_truth)
	predicted_labels = BinaryLabels(predicted)
	
	base_evaluator = ContingencyTableEvaluation()
	base_evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = AccuracyMeasure()
	accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = ErrorRateMeasure()
	errorrate = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = BALMeasure()
	bal = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = WRACCMeasure()
	wracc = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = F1Measure()
	f1 = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = CrossCorrelationMeasure()
	crosscorrelation = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = RecallMeasure()
	recall = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = PrecisionMeasure()
	precision = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = SpecificityMeasure()
	specificity = evaluator.evaluate(predicted_labels,ground_truth_labels)

	return accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity
Пример #22
0
def get_labels(raw=False, type='binary'):
	data = concatenate(array(
		(-ones(NUM_EXAMPLES, dtype=double), ones(NUM_EXAMPLES, dtype=double))
	))
	if raw:
		return data
	else:
		if type == 'binary':
			return BinaryLabels(data)
		if type == 'regression':
			return RegressionLabels(data)
		return None
Пример #23
0
    def train(self, data, labels):
        """
        model training 
        """

        # centered WDK/WDK-shift
        if self.param["shifts"] == 0:
            kernel_center = WeightedDegreeStringKernel(self.param["degree"])
        else:
            kernel_center = WeightedDegreePositionStringKernel(
                10, self.param["degree"])
            shifts_vector = numpy.ones(
                self.param["center_offset"] * 2,
                dtype=numpy.int32) * self.param["shifts"]
            kernel_center.set_shifts(shifts_vector)

        kernel_center.set_cache_size(self.param["kernel_cache"] / 3)

        # border spetrum kernels
        size = self.param["kernel_cache"] / 3
        use_sign = False
        kernel_left = WeightedCommWordStringKernel(size, use_sign)
        kernel_right = WeightedCommWordStringKernel(size, use_sign)

        # assemble combined kernel
        kernel = CombinedKernel()
        kernel.append_kernel(kernel_center)
        kernel.append_kernel(kernel_left)
        kernel.append_kernel(kernel_right)

        ## building features
        feat = create_features(data, self.param["center_offset"],
                               self.param["center_pos"])

        # init combined kernel
        kernel.init(feat, feat)

        print "len(labels) = %i" % (len(labels))
        lab = BinaryLabels(numpy.double(labels))
        self.svm = SVMLight(self.param["cost"], kernel, lab)

        # show debugging output
        self.svm.io.enable_progress()
        self.svm.io.set_loglevel(MSG_DEBUG)

        # optimization settings
        num_threads = 2
        self.svm.parallel.set_num_threads(num_threads)
        self.svm.set_epsilon(10e-8)

        self.svm.train()

        return self
Пример #24
0
def features_from_file(fileName):

    fileHandle = open(fileName)
    fileHandle.readline()
    features = []
    labels = []
    for line in fileHandle:
        tokens = line.split(',')
        labels.append(float(tokens[1]))
        features.append([float(token) for token in tokens[2:]])

    return RealFeatures(numpy.transpose(
        numpy.array(features))), features, BinaryLabels(
            numpy.array(labels, numpy.float))
Пример #25
0
def kernel_auc_modular(fm_train_real=traindat,label_train_real=testdat,width=1.7):


	from shogun.Kernel import GaussianKernel, AUCKernel
	from shogun.Features import RealFeatures, BinaryLabels

	feats_train=RealFeatures(fm_train_real)

	subkernel=GaussianKernel(feats_train, feats_train, width)

	kernel=AUCKernel(0, subkernel)
	kernel.setup_auc_maximization( BinaryLabels(label_train_real) )
	km_train=kernel.get_kernel_matrix()
	return kernel
Пример #26
0
def kernel_combined_custom_poly_modular(fm_train_real=traindat,
                                        fm_test_real=testdat,
                                        fm_label_twoclass=label_traindat):
    from shogun.Features import CombinedFeatures, RealFeatures, BinaryLabels
    from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel
    from shogun.Classifier import LibSVM

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()

    tfeats = RealFeatures(fm_train_real)
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, tfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_train = RealFeatures(fm_train_real)
    feats_train.append_feature_obj(subkfeats_train)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = BinaryLabels(fm_label_twoclass)
    svm = LibSVM(1.0, kernel, labels)
    svm.train()

    kernel = CombinedKernel()
    feats_pred = CombinedFeatures()

    pfeats = RealFeatures(fm_test_real)
    tkernel = PolyKernel(10, 3)
    tkernel.init(tfeats, pfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_test = RealFeatures(fm_test_real)
    feats_pred.append_feature_obj(subkfeats_test)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_pred)

    svm.set_kernel(kernel)
    svm.apply()
    km_train = kernel.get_kernel_matrix()
    return km_train, kernel
def classifier_perceptron_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,learn_rate=1.,max_iter=1000,num_threads=1):
	from shogun.Features import RealFeatures, BinaryLabels
	from shogun.Classifier import Perceptron

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	labels=BinaryLabels(label_train_twoclass)

	perceptron=Perceptron(feats_train, labels)
	perceptron.set_learn_rate(learn_rate)
	perceptron.set_max_iter(max_iter)
	# only guaranteed to converge for separable data
	perceptron.train()

	perceptron.set_features(feats_test)
	out_labels = perceptron.apply().get_labels()
	return perceptron, out_labels
def classifier_libsvm_minimal_modular(fm_train_real=traindat,
                                      fm_test_real=testdat,
                                      label_train_twoclass=label_traindat,
                                      width=2.1,
                                      C=1):
    from shogun.Features import RealFeatures, BinaryLabels
    from shogun.Classifier import LibSVM
    from shogun.Kernel import GaussianKernel

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = BinaryLabels(label_train_twoclass)
    svm = LibSVM(C, kernel, labels)
    svm.train()

    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    testerr = mean(sign(out) != label_train_twoclass)
def classifier_lda_modular(fm_train_real=traindat,
                           fm_test_real=testdat,
                           label_train_twoclass=label_traindat,
                           gamma=3,
                           num_threads=1):
    from shogun.Features import RealFeatures, BinaryLabels
    from shogun.Classifier import LDA

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = BinaryLabels(label_train_twoclass)

    lda = LDA(gamma, feats_train, labels)
    lda.train()

    lda.get_bias()
    lda.get_w()
    lda.set_features(feats_test)
    lda.apply().get_labels()
    return lda, lda.apply().get_labels()
Пример #30
0
def get_labels (num, ltype='twoclass'):
	"""Return labels used for classification.

	@param num Number of labels
	@param ltype Type of labels, either twoclass or series.
	@return Tuple to contain the labels as numbers in a tuple and labels as objects digestable for Shogun.
	"""

	labels=[]
	if ltype=='twoclass':
		labels.append(random.rand(num).round()*2-1)
		# essential to wrap in array(), will segfault sometimes otherwise
		labels.append(BinaryLabels(numpy.array(labels[0])))
	elif ltype=='series':
		labels.append([numpy.double(x) for x in xrange(num)])
		# essential to wrap in array(), will segfault sometimes otherwise
		labels.append(RegressionLabels(numpy.array(labels[0])))
	else:
		return [None, None]

	return labels