def calcprc(output, LTE): pm = PerformanceMeasures(Labels(numpy.array(LTE)), Labels(numpy.array(output))) auPRC = pm.get_auPRC() return auPRC
def calcprc(output, LTE): """The area under the precision recall curve""" pm = PRCEvaluation() pm.evaluate(Labels(numpy.array(output)), Labels(numpy.array(LTE))) auPRC = pm.get_auPRC() return auPRC
def calcroc(output, LTE): """The area under the receiver operating characteristic curve""" pm = PerformanceMeasures(Labels(numpy.array(LTE)), Labels(numpy.array(output))) auROC = pm.get_auROC() return auROC
def calcroc(output, LTE): """The area under the receiver operating characteristic curve""" pm = ROCEvaluation() pm.evaluate(Labels(numpy.array(output)), Labels(numpy.array(LTE))) auROC = pm.get_auROC() return auROC
def calcprc(output, LTE): """The area under the precision recall curve""" pm = PerformanceMeasures(Labels(numpy.array(LTE)), Labels(numpy.array(output))) auPRC = pm.get_auPRC() return auPRC
def classifier_domainadaptationsvm_modular(fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna, \ label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \ label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3): feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = Labels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.train() #svm.io.set_loglevel(MSG_DEBUG) ##################################### #print "obtaining DA SVM from previously trained SVM" feats_train2 = StringCharFeatures(fm_train_dna, DNA) feats_test2 = StringCharFeatures(fm_test_dna, DNA) kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels2 = Labels(label_train_dna) # we regularize against the previously obtained solution dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0) dasvm.train() out = dasvm.classify(feats_test2).get_labels() return out #,dasvm TODO
def svm_learn(kernel, labels, options): """train SVM using SVMLight or LibSVM Arguments: kernel -- kernel object from Shogun toolbox lebels -- list of labels options -- object containing option data Return: trained svm object """ try: svm = SVMLight(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double))) except NameError: svm = LibSVM(options.svmC, kernel, Labels(numpy.array(labels, dtype=numpy.double))) if options.quiet == False: svm.io.set_loglevel(MSG_INFO) svm.io.set_target_to_stderr() svm.set_epsilon(options.epsilon) svm.parallel.set_num_threads(1) if options.weight != 1.0: svm.set_C(options.svmC, options.svmC * options.weight) svm.train() if options.quiet == False: svm.io.set_loglevel(MSG_ERROR) return svm
def plotprc(output, LTE, figure_fname="", prc_label='PRC'): """Plot the precision recall curve""" import pylab import matplotlib pylab.figure(2, dpi=150, figsize=(4, 4)) pm = PerformanceMeasures(Labels(numpy.array(LTE)), Labels(numpy.array(output))) points = pm.get_PRC() points = numpy.array(points).T # for pylab.plot pylab.plot(points[0], points[1], 'b-', label=prc_label) pylab.axis([0, 1, 0, 1]) ticks = numpy.arange(0., 1., .1, dtype=numpy.float64) pylab.xticks(ticks, size=10) pylab.yticks(ticks, size=10) pylab.xlabel('sensitivity (true positive rate)', size=10) pylab.ylabel('precision (1 - false discovery rate)', size=10) pylab.legend(loc='lower right') if figure_fname != None: warnings.filterwarnings('ignore', 'Could not match*') tempfname = figure_fname + '.png' pylab.savefig(tempfname) shutil.move(tempfname, figure_fname) auPRC = pm.get_auPRC() return auPRC
def plotroc(output, LTE, draw_random=False, figure_fname="", roc_label='ROC'): """Plot the receiver operating characteristic curve""" import pylab import matplotlib pylab.figure(1, dpi=150, figsize=(4, 4)) fontdict = dict(family="cursive", weight="bold", size=7, y=1.05) pm = PerformanceMeasures(Labels(numpy.array(LTE)), Labels(numpy.array(output))) points = pm.get_ROC() points = numpy.array(points).T # for pylab.plot pylab.plot(points[0], points[1], 'b-', label=roc_label) if draw_random: pylab.plot([0, 1], [0, 1], 'r-', label='random guessing') pylab.axis([0, 1, 0, 1]) ticks = numpy.arange(0., 1., .1, dtype=numpy.float64) pylab.xticks(ticks, size=10) pylab.yticks(ticks, size=10) pylab.xlabel('1 - specificity (false positive rate)', size=10) pylab.ylabel('sensitivity (true positive rate)', size=10) pylab.legend(loc='lower right', prop=matplotlib.font_manager.FontProperties('tiny')) if figure_fname != None: warnings.filterwarnings('ignore', 'Could not match*') tempfname = figure_fname + '.png' pylab.savefig(tempfname) shutil.move(tempfname, figure_fname) auROC = pm.get_auROC() return auROC
def modelselection_grid_search_kernel(): num_subsets = 3 num_vectors = 20 dim_vectors = 3 # create some (non-sense) data matrix = rand(dim_vectors, num_vectors) # create num_feautres 2-dimensional vectors features = RealFeatures() features.set_feature_matrix(matrix) # create labels, two classes labels = Labels(num_vectors) for i in range(num_vectors): labels.set_label(i, 1 if i % 2 == 0 else -1) # create svm classifier = LibSVM() # splitting strategy splitting_strategy = StratifiedCrossValidationSplitting( labels, num_subsets) # accuracy evaluation evaluation_criterion = ContingencyTableEvaluation(ACCURACY) # cross validation class for evaluation in model selection cross = CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion) cross.set_num_runs(1) # print all parameter available for modelselection # Dont worry if yours is not included, simply write to the mailing list classifier.print_modsel_params() # model parameter selection param_tree = create_param_tree() param_tree.print_tree() grid_search = GridSearchModelSelection(param_tree, cross) print_state = True best_combination = grid_search.select_model(print_state) print("best parameter(s):") best_combination.print_tree() best_combination.apply_to_machine(classifier) # larger number of runs to have tighter confidence intervals cross.set_num_runs(10) cross.set_conf_int_alpha(0.01) result = cross.evaluate() print("result: ") result.print_result() return 0
def modelselection_grid_search_kernel(): num_subsets=3 num_vectors=20 dim_vectors=3 # create some (non-sense) data matrix=rand(dim_vectors, num_vectors) # create num_feautres 2-dimensional vectors features=RealFeatures() features.set_feature_matrix(matrix) # create labels, two classes labels=Labels(num_vectors) for i in range(num_vectors): labels.set_label(i, 1 if i%2==0 else -1) # create svm classifier=LibSVM() # splitting strategy splitting_strategy=StratifiedCrossValidationSplitting(labels, num_subsets) # accuracy evaluation evaluation_criterion=ContingencyTableEvaluation(ACCURACY) # cross validation class for evaluation in model selection cross=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion) cross.set_num_runs(1) # print all parameter available for modelselection # Dont worry if yours is not included, simply write to the mailing list classifier.print_modsel_params() # model parameter selection param_tree=create_param_tree() param_tree.print_tree() grid_search=GridSearchModelSelection(param_tree, cross) print_state=True best_combination=grid_search.select_model(print_state) print("best parameter(s):") best_combination.print_tree() best_combination.apply_to_machine(classifier) # larger number of runs to have tighter confidence intervals cross.set_num_runs(10) cross.set_conf_int_alpha(0.01) result=cross.evaluate() print("result: ") result.print_result() return 0
def evaluation_multiclassaccuracy_modular(ground_truth, predicted): from shogun.Features import Labels from shogun.Evaluation import MulticlassAccuracy ground_truth_labels = Labels(ground_truth) predicted_labels = Labels(predicted) evaluator = MulticlassAccuracy() accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels) return accuracy
def knn_train(train_data=None, train_label = None, k=1): train_data = RealFeatures(train_data) distance = EuclidianDistance(train_data, train_data) try: train_label = Labels(array(train_label.tolist(), dtype=float64)) except Exception as e: print e raise Exception knn_model = KNN(k, distance, train_label) knn_train = knn_model.train() return knn_model
def evaluation_meansquarederror_modular(ground_truth, predicted): from shogun.Features import Labels from shogun.Evaluation import MeanSquaredError ground_truth_labels = Labels(ground_truth) predicted_labels = Labels(predicted) evaluator = MeanSquaredError() mse = evaluator.evaluate(predicted_labels,ground_truth_labels) return mse
def evaluation_rocevaluation_modular(ground_truth, predicted): from shogun.Features import Labels from shogun.Evaluation import ROCEvaluation ground_truth_labels = Labels(ground_truth) predicted_labels = Labels(predicted) evaluator = ROCEvaluation() evaluator.evaluate(predicted_labels,ground_truth_labels) return evaluator.get_ROC(), evaluator.get_auROC()
def classifier_mpdsvm_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_twoclass=label_traindat, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import MPDSVM feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) width = 2.1 kernel = GaussianKernel(feats_train, feats_train, width) labels = Labels(label_train_twoclass) svm = MPDSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def regression_svrlight_modular(fm_train=traindat,fm_test=testdat,label_train=label_traindat, \ width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3): from shogun.Features import Labels, RealFeatures from shogun.Kernel import GaussianKernel try: from shogun.Regression import SVRLight except ImportError: print 'No support for SVRLight available.' return feats_train = RealFeatures(fm_train) feats_test = RealFeatures(fm_test) kernel = GaussianKernel(feats_train, feats_train, width) labels = Labels(label_train) svr = SVRLight(C, epsilon, kernel, labels) svr.set_tube_epsilon(tube_epsilon) svr.parallel.set_num_threads(num_threads) svr.train() kernel.init(feats_train, feats_test) out = svr.classify().get_labels() return out, kernel
def classifier_svmsgd_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_twoclass=label_traindat, C=0.9, num_threads=1, num_iter=5): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMSGD realfeat = RealFeatures(fm_train_real) feats_train = SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat = RealFeatures(fm_test_real) feats_test = SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels = Labels(label_train_twoclass) svm = SVMSGD(C, feats_train, labels) svm.set_epochs(num_iter) #svm.io.set_loglevel(0) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCHDDecoder, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = Labels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels) mc_classifier.train() out_mc = mc_classifier.apply(feats_test).get_labels() ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCHDDecoder()) ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels) ecoc_classifier.train() out_ecoc = ecoc_classifier.apply(feats_test).get_labels() n_diff = (out_mc != out_ecoc).sum() if n_diff == 0: print("Same results for OvR and ECOCOvR") else: print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc))) return out_ecoc, out_mc
def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1): from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel try: from shogun.Classifier import SVMLight except ImportError: print 'No support for SVMLight available.' return feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.apply().get_labels() return kernel
def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna,degree=3, \ C=10,epsilon=1e-5,num_threads=1): from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel from shogun.Classifier import SVMLight feats_train = StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = Labels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term( -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double)) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() return out, kernel
def mlprocess(task_filename, data_filename, pred_filename, verbose=True): """Demo of creating machine learning process.""" task_type, fidx, lidx, train_idx, test_idx = parse_task(task_filename) outputs = init_output(task_type) all_data = parse_data(data_filename) train_ex, train_lab, test_ex, test_lab = split_data(all_data, fidx, lidx, train_idx, test_idx) label_train = outputs.str2label(train_lab) if verbose: print('Number of features: %d' % train_ex.shape[0]) print('%d training examples, %d test examples' % (len(train_lab), len(test_lab))) feats_train = RealFeatures(train_ex) feats_test = RealFeatures(test_ex) width=1.0 kernel=GaussianKernel(feats_train, feats_train, width) labels=Labels(label_train) svm = init_svm(task_type, kernel, labels) svm.train() kernel.init(feats_train, feats_test) preds = svm.classify().get_labels() pred_label = outputs.label2str(preds) pf = open(pred_filename, 'w') for pred in pred_label: pf.write(pred+'\n') pf.close()
def libsvm(): print 'LibSVM' from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Evaluation import PerformanceMeasures from shogun.Classifier import LibSVM feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) width = 2.1 kernel = GaussianKernel(feats_train, feats_train, width) C = 1 epsilon = 1e-5 labels = Labels(label_train_twoclass) svm = LibSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() #kernel.init(feats_train, feats_test) output = svm.classify(feats_test) #.get_labels() #output_vector = output.get_labels() out = svm.classify().get_labels() testerr = mean(sign(out) != testlab) print testerr
def classifier_subgradientsvm_modular(fm_train_real, fm_test_real, label_train_twoclass, C, epsilon, max_train_time): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SubGradientSVM realfeat = RealFeatures(fm_train_real) feats_train = SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat = RealFeatures(fm_test_real) feats_test = SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels = Labels(label_train_twoclass) svm = SubGradientSVM(C, feats_train, labels) svm.set_epsilon(epsilon) svm.set_max_train_time(max_train_time) svm.train() svm.set_features(feats_test) labels = svm.apply().get_labels() return labels, svm
def classifier_libsvm_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_twoclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVM feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) kernel = GaussianKernel(feats_train, feats_train, width) labels = Labels(label_train_twoclass) svm = LibSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) labels = svm.classify().get_labels() supportvectors = sv_idx = svm.get_support_vectors() alphas = svm.get_alphas() predictions = svm.classify() return predictions, svm, predictions.get_labels()
def classifier_svmlin_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_twoclass=label_traindat, C=0.9, epsilon=1e-5, num_threads=1): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMLin realfeat = RealFeatures(fm_train_real) feats_train = SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat = RealFeatures(fm_test_real) feats_test = SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels = Labels(label_train_twoclass) svm = SVMLin(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.get_bias() svm.get_w() svm.classify().get_labels() predictions = svm.classify() return predictions, svm, predictions.get_labels()
def modelselection_grid_search_linear_modular(traindat=traindat, label_traindat=label_traindat): from shogun.Evaluation import CrossValidation, CrossValidationResult from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY from shogun.Evaluation import StratifiedCrossValidationSplitting from shogun.ModelSelection import GridSearchModelSelection from shogun.ModelSelection import ModelSelectionParameters, R_EXP from shogun.ModelSelection import ParameterCombination from shogun.Features import Labels from shogun.Features import RealFeatures from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC # build parameter tree to select C1 and C2 param_tree_root = ModelSelectionParameters() c1 = ModelSelectionParameters("C1") param_tree_root.append_child(c1) c1.build_values(-2.0, 2.0, R_EXP) c2 = ModelSelectionParameters("C2") param_tree_root.append_child(c2) c2.build_values(-2.0, 2.0, R_EXP) # training data features = RealFeatures(traindat) labels = Labels(label_traindat) # classifier classifier = LibLinear(L2R_L2LOSS_SVC) # print all parameter available for modelselection # Dont worry if yours is not included but, write to the mailing list classifier.print_modsel_params() # splitting strategy for cross-validation splitting_strategy = StratifiedCrossValidationSplitting(labels, 10) # evaluation method evaluation_criterium = ContingencyTableEvaluation(ACCURACY) # cross-validation instance cross_validation = CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium) # model selection instance model_selection = GridSearchModelSelection(param_tree_root, cross_validation) # perform model selection with selected methods #print "performing model selection of" #param_tree_root.print_tree() best_parameters = model_selection.select_model() # print best parameters #print "best parameters:" #best_parameters.print_tree() # apply them and print result best_parameters.apply_to_machine(classifier) result = cross_validation.evaluate()
def classifier_larank_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, C=0.9, num_threads=1, num_iter=5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LaRank from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) width = 2.1 kernel = GaussianKernel(feats_train, feats_train, width) epsilon = 1e-5 labels = Labels(label_train_multiclass) svm = LaRank(C, kernel, labels) #svm.set_tau(1e-3) svm.set_batch_mode(False) #svm.io.enable_progress() svm.set_epsilon(epsilon) svm.train() out = svm.apply(feats_train).get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def kernel_salzberg_word_string_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, order=3, gap=0, reverse=False): from shogun.Features import StringCharFeatures, StringWordFeatures, DNA, Labels from shogun.Kernel import SalzbergWordStringKernel from shogun.Classifier import PluginEstimate charfeat = StringCharFeatures(fm_train_dna, DNA) feats_train = StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) charfeat = StringCharFeatures(fm_test_dna, DNA) feats_test = StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) pie = PluginEstimate() labels = Labels(label_train_dna) pie.set_labels(labels) pie.set_features(feats_train) pie.train() kernel = SalzbergWordStringKernel(feats_train, feats_train, pie, labels) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) pie.set_features(feats_test) pie.apply().get_labels() km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def get_labels(raw=False): data = concatenate( array( (-ones(NUM_EXAMPLES, dtype=double), ones(NUM_EXAMPLES, dtype=double)))) if raw: return data else: return Labels(data)
def regression_least_squares_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,tau=1e-6): from shogun.Features import Labels, RealFeatures from shogun.Kernel import GaussianKernel from shogun.Regression import LeastSquaresRegression ls=LeastSquaresRegression(RealFeatures(traindat), Labels(label_train)) ls.train() out = ls.apply(RealFeatures(fm_test)).get_labels() return out,ls
def evaluation_contingencytableevaluation_modular(ground_truth, predicted): from shogun.Features import Labels from shogun.Evaluation import ContingencyTableEvaluation from shogun.Evaluation import AccuracyMeasure, ErrorRateMeasure, BALMeasure from shogun.Evaluation import WRACCMeasure, F1Measure, CrossCorrelationMeasure from shogun.Evaluation import RecallMeasure, PrecisionMeasure, SpecificityMeasure ground_truth_labels = Labels(ground_truth) predicted_labels = Labels(predicted) base_evaluator = ContingencyTableEvaluation() base_evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = AccuracyMeasure() accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = ErrorRateMeasure() errorrate = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = BALMeasure() bal = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = WRACCMeasure() wracc = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = F1Measure() f1 = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = CrossCorrelationMeasure() crosscorrelation = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = RecallMeasure() recall = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = PrecisionMeasure() precision = evaluator.evaluate(predicted_labels, ground_truth_labels) evaluator = SpecificityMeasure() specificity = evaluator.evaluate(predicted_labels, ground_truth_labels) return accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity
def classifier_multiclassmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) labels=Labels(label_train_multiclass) classifier = LibSVM(C, kernel, labels) classifier.set_epsilon(epsilon) print labels.get_labels() mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),kernel,classifier,labels) mc_classifier.train() kernel.init(feats_train, feats_test) out = mc_classifier.apply().get_labels() return out
num=10 dist=1 width=2.1 traindata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1) testdata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1); trainlab=concatenate((-ones(num), ones(num))); testlab=concatenate((-ones(num), ones(num))); feats_train=RealFeatures(traindata_real); feats_test=RealFeatures(testdata_real); kernel=GaussianKernel(feats_train, feats_train, width); kernel.io.set_loglevel(MSG_DEBUG) labels=Labels(trainlab); svm=SVMLight(2, kernel, labels) svm.train() svm.io.set_loglevel(MSG_DEBUG) ################################################## print "labels:" print labels.to_string() print "features" print feats_train.to_string() print "kernel" print kernel.to_string()
def features_io_modular(fm_train_real, label_train_twoclass): import numpy from shogun.Features import SparseRealFeatures, RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.IO import AsciiFile, BinaryFile, HDF5File feats=SparseRealFeatures(fm_train_real) feats2=SparseRealFeatures() f=BinaryFile("fm_train_sparsereal.bin","w") feats.save(f) f=AsciiFile("fm_train_sparsereal.ascii","w") feats.save(f) f=BinaryFile("fm_train_sparsereal.bin") feats2.load(f) f=AsciiFile("fm_train_sparsereal.ascii") feats2.load(f) feats=RealFeatures(fm_train_real) feats2=RealFeatures() f=BinaryFile("fm_train_real.bin","w") feats.save(f) f=HDF5File("fm_train_real.h5","w", "/data/doubles") feats.save(f) f=AsciiFile("fm_train_real.ascii","w") feats.save(f) f=BinaryFile("fm_train_real.bin") feats2.load(f) #print "diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())) f=AsciiFile("fm_train_real.ascii") feats2.load(f) #print "diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())) lab=Labels(numpy.array([1.0,2.0,3.0])) lab2=Labels() f=AsciiFile("label_train_twoclass.ascii","w") lab.save(f) f=BinaryFile("label_train_twoclass.bin","w") lab.save(f) f=HDF5File("label_train_real.h5","w", "/data/labels") lab.save(f) f=AsciiFile("label_train_twoclass.ascii") lab2.load(f) f=BinaryFile("label_train_twoclass.bin") lab2.load(f) f=HDF5File("fm_train_real.h5","r", "/data/doubles") feats2.load(f) #print feats2.get_feature_matrix() f=HDF5File("label_train_real.h5","r", "/data/labels") lab2.load(f) #print lab2.get_labels() #clean up import os for f in ['fm_train_sparsereal.bin','fm_train_sparsereal.ascii', 'fm_train_real.bin','fm_train_real.h5','fm_train_real.ascii', 'label_train_real.h5', 'label_train_twoclass.ascii','label_train_twoclass.bin']: os.unlink(f) return feats, feats2, lab, lab2