def classifier_multiclasslogisticregression_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, z=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = MulticlassLogisticRegression(z, feats_train, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def evaluation_multiclassovrevaluation_modular(traindat, label_traindat, testdat, label_testdat): from shogun.Features import MulticlassLabels from shogun.Evaluation import MulticlassOVREvaluation, ROCEvaluation from modshogun import MulticlassLibLinear, RealFeatures, ContingencyTableEvaluation, ACCURACY from shogun.Mathematics import Math Math.init_random(1) ground_truth_labels = MulticlassLabels(label_traindat) svm = MulticlassLibLinear(1.0, RealFeatures(traindat), MulticlassLabels(label_traindat)) svm.train() predicted_labels = svm.apply() binary_evaluator = ROCEvaluation() evaluator = MulticlassOVREvaluation(binary_evaluator) mean_roc = evaluator.evaluate(predicted_labels, ground_truth_labels) #print mean_roc binary_evaluator = ContingencyTableEvaluation(ACCURACY) evaluator = MulticlassOVREvaluation(binary_evaluator) mean_accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels) #print mean_accuracy return mean_roc, mean_accuracy
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, width=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(), feats_train, classifier, labels) mc_classifier.train() label_pred = mc_classifier.apply() out = label_pred.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclassmachine_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) kernel = GaussianKernel(feats_train, feats_train, width) labels = MulticlassLabels(label_train_multiclass) classifier = LibSVM() classifier.set_epsilon(epsilon) print labels.get_labels() mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(), kernel, classifier, labels) mc_classifier.train() kernel.init(feats_train, feats_test) out = mc_classifier.apply().get_labels() return out
def classifier_multiclass_shareboost(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, RealSubsetFeatures, MulticlassLabels from shogun.Classifier import ShareBoost #print('Working on a problem of %d features and %d samples' % fm_train_real.shape) feats_train = RealFeatures(fm_train_real) labels = MulticlassLabels(label_train_multiclass) shareboost = ShareBoost(feats_train, labels, min(fm_train_real.shape[0] - 1, 30)) shareboost.train() #print(shareboost.get_activeset()) feats_test = RealSubsetFeatures(RealFeatures(fm_test_real), shareboost.get_activeset()) label_pred = shareboost.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) #print('Accuracy = %.4f' % acc) return out
def classifier_multiclass_relaxedtree(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import RelaxedTree, MulticlassLibLinear from shogun.Kernel import GaussianKernel #print('Working on a problem of %d features and %d samples' % fm_train_real.shape) feats_train = RealFeatures(fm_train_real) labels = MulticlassLabels(label_train_multiclass) machine = RelaxedTree() machine.set_machine_for_confusion_matrix(MulticlassLibLinear()) machine.set_kernel(GaussianKernel()) machine.set_labels(labels) machine.train(feats_train) label_pred = machine.apply_multiclass(RealFeatures(fm_test_real)) out = label_pred.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def evaluation_multiclassaccuracy_modular (ground_truth, predicted): from shogun.Features import MulticlassLabels from shogun.Evaluation import MulticlassAccuracy ground_truth_labels = MulticlassLabels(ground_truth) predicted_labels = MulticlassLabels(predicted) evaluator = MulticlassAccuracy() accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels) return accuracy
def classifier_multiclass_ecoc_ovr(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCLLBDecoder, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels) mc_classifier.train() label_mc = mc_classifier.apply(feats_test) out_mc = label_mc.get_labels() ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCLLBDecoder()) ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels) ecoc_classifier.train() label_ecoc = ecoc_classifier.apply(feats_test) out_ecoc = label_ecoc.get_labels() n_diff = (out_mc != out_ecoc).sum() if n_diff == 0: print("Same results for OvR and ECOCOvR") else: print( "Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc))) if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_mc = evaluator.evaluate(label_mc, labels_test) acc_ecoc = evaluator.evaluate(label_ecoc, labels_test) print('Normal OVR Accuracy = %.4f' % acc_mc) print('ECOC OVR Accuracy = %.4f' % acc_ecoc) return out_ecoc, out_mc
def classifier_multiclass_ecoc_random(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(), ECOCHDDecoder()) rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(), ECOCHDDecoder()) dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels) dense_classifier.train() label_dense = dense_classifier.apply(feats_test) out_dense = label_dense.get_labels() sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels) sparse_classifier.train() label_sparse = sparse_classifier.apply(feats_test) out_sparse = label_sparse.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc_dense = evaluator.evaluate(label_dense, labels_test) acc_sparse = evaluator.evaluate(label_sparse, labels_test) print('Random Dense Accuracy = %.4f' % acc_dense) print('Random Sparse Accuracy = %.4f' % acc_sparse) return out_sparse, out_dense
def classifier_larank_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, C=0.9, num_threads=1, num_iter=5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Kernel import GaussianKernel from shogun.Classifier import LaRank from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) width = 2.1 kernel = GaussianKernel(feats_train, feats_train, width) epsilon = 1e-5 labels = MulticlassLabels(label_train_multiclass) svm = LaRank(C, kernel, labels) #svm.set_tau(1e-3) svm.set_batch_mode(False) #svm.io.enable_progress() svm.set_epsilon(epsilon) svm.train() out = svm.apply(feats_train).get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def RunNBCShogun(q): totalTimer = Timer() Log.Info("Loading dataset", self.verbose) try: # Load train and test dataset. trainData = np.genfromtxt(self.dataset[0], delimiter=',') testData = np.genfromtxt(self.dataset[1], delimiter=',') # Labels are the last row of the training set. labels = MulticlassLabels(trainData[:, (trainData.shape[1] - 1)]) with totalTimer: # Transform into features. trainFeat = RealFeatures(trainData[:, :-1].T) testFeat = RealFeatures(testData.T) # Create and train the classifier. nbc = GaussianNaiveBayes(trainFeat, labels) nbc.train() # Run Naive Bayes Classifier on the test dataset. nbc.apply(testFeat).get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def evaluation_clustering(features=fea, ground_truth=gnd_raw, ncenters=10): from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation from shogun.Features import MulticlassLabels from shogun.Mathematics import Math # reproducable results Math.init_random(1) centroids = run_clustering(features, ncenters) gnd_hat = assign_labels(features, centroids, ncenters) gnd = MulticlassLabels(ground_truth) AccuracyEval = ClusteringAccuracy() AccuracyEval.best_map(gnd_hat, gnd) accuracy = AccuracyEval.evaluate(gnd_hat, gnd) #print(('Clustering accuracy = %.4f' % accuracy)) MIEval = ClusteringMutualInformation() mutual_info = MIEval.evaluate(gnd_hat, gnd) #print(('Clustering mutual information = %.4f' % mutual_info)) # TODO mutual information does not work with serialization #return gnd, gnd_hat, accuracy, MIEval, mutual_info return gnd, gnd_hat, accuracy
def RunAllKnnShogun(q): totalTimer = Timer() # Load input dataset. # If the dataset contains two files then the second file is the query # file. try: Log.Info("Loading dataset", self.verbose) if len(self.dataset) == 2: referenceData = np.genfromtxt(self.dataset[0], delimiter=',') queryData = np.genfromtxt(self.dataset[1], delimiter=',') queryFeat = RealFeatures(queryFeat.T) else: referenceData = np.genfromtxt(self.dataset, delimiter=',') # Labels are the last row of the dataset. labels = MulticlassLabels( referenceData[:, (referenceData.shape[1] - 1)]) referenceData = referenceData[:, :-1] with totalTimer: # Get all the parameters. k = re.search("-k (\d+)", options) if not k: Log.Fatal( "Required option: Number of furthest neighbors to find." ) q.put(-1) return -1 else: k = int(k.group(1)) if (k < 1 or k > referenceData.shape[0]): Log.Fatal("Invalid k: " + k.group(1) + "; must be greater than 0" + " and less or equal than " + str(referenceData.shape[0])) q.put(-1) return -1 referenceFeat = RealFeatures(referenceData.T) distance = EuclideanDistance(referenceFeat, referenceFeat) # Perform All K-Nearest-Neighbors. model = SKNN(k, distance, labels) model.train() if len(self.dataset) == 2: out = model.apply(queryFeat).get_labels() else: out = model.apply(referenceFeat).get_labels() except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) encoder = ECOCDiscriminantEncoder() encoder.set_features(feats_train) encoder.set_labels(labels) encoder.set_sffs_iterations(50) strategy = ECOCStrategy(encoder, ECOCHDDecoder()) classifier = LinearMulticlassMachine(strategy, feats_train, classifier, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def classifier_multiclassmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) labels=MulticlassLabels(label_train_multiclass) classifier = LibSVM() classifier.set_epsilon(epsilon) print labels.get_labels() mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),kernel,classifier,labels) mc_classifier.train() kernel.init(feats_train, feats_test) out = mc_classifier.apply().get_labels() return out
def assign_labels(data, centroids): from shogun.Classifier import KNN from numpy import arange labels = MulticlassLabels(arange(0., 10.)) fea = RealFeatures(data) fea_centroids = RealFeatures(centroids) distance = EuclidianDistance(fea_centroids, fea_centroids) knn = KNN(1, distance, labels) knn.train() return knn.apply(fea)
def mkl_multiclass_modular(fm_train_real, fm_test_real, label_train_multiclass, width, C, epsilon, num_threads, mkl_epsilon, mkl_norm): from shogun.Features import CombinedFeatures, RealFeatures, MulticlassLabels from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel, PolyKernel from shogun.Classifier import MKLMulticlass kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = GaussianKernel(10, width) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = LinearKernel() feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = PolyKernel(10, 2) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = MulticlassLabels(label_train_multiclass) mkl = MKLMulticlass(C, kernel, labels) mkl.set_epsilon(epsilon) mkl.parallel.set_num_threads(num_threads) mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() kernel.init(feats_train, feats_test) out = mkl.apply().get_labels() return out
def classifier_gaussiannaivebayes_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import GaussianNaiveBayes feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) gnb = GaussianNaiveBayes(feats_train, labels) gnb_train = gnb.train() output = gnb.apply(feats_test).get_labels() return gnb, gnb_train, output
def evaluation_clustering_simple(n_data=100, sqrt_num_blobs=4, distance=5): from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation from shogun.Features import MulticlassLabels, GaussianBlobsDataGenerator from shogun.Mathematics import Math # reproducable results Math.init_random(1) # produce sone Gaussian blobs to cluster ncenters = sqrt_num_blobs**2 stretch = 1 angle = 1 gen = GaussianBlobsDataGenerator(sqrt_num_blobs, distance, stretch, angle) features = gen.get_streamed_features(n_data) X = features.get_feature_matrix() # compute approximate "ground truth" labels via taking the closest blob mean coords = array(range(0, sqrt_num_blobs * distance, distance)) idx_0 = [abs(coords - x).argmin() for x in X[0]] idx_1 = [abs(coords - x).argmin() for x in X[1]] ground_truth = array( [idx_0[i] * sqrt_num_blobs + idx_1[i] for i in range(n_data)], dtype="float64") #for label in unique(ground_truth): # indices=ground_truth==label # plot(X[0][indices], X[1][indices], 'o') #show() centroids = run_clustering(features, ncenters) gnd_hat = assign_labels(features, centroids, ncenters) gnd = MulticlassLabels(ground_truth) AccuracyEval = ClusteringAccuracy() AccuracyEval.best_map(gnd_hat, gnd) accuracy = AccuracyEval.evaluate(gnd_hat, gnd) # in this case we know that the clustering has to be very good #print(('Clustering accuracy = %.4f' % accuracy)) assert (accuracy > 0.8) MIEval = ClusteringMutualInformation() mutual_info = MIEval.evaluate(gnd_hat, gnd) #print(('Clustering mutual information = %.4f' % mutual_info)) # TODO add multiclass labels and MI once the serialization works #return gnd, accuracy, mutual_info return accuracy
def classifier_multiclassmultipleoutputliblinear_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels, MulticlassMultipleOutputLabels from shogun.Classifier import MulticlassLibLinear feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=MulticlassLabels(label_train_multiclass) classifier = MulticlassLibLinear(C,feats_train,labels) classifier.train() label_pred = classifier.apply_multiclass_multiple_output(feats_test,2) out = label_pred.get_labels() #print out return out
def classifier_conjugateindex_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import ConjugateIndex feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) ci = ConjugateIndex(feats_train, labels) ci.train() res = ci.apply(feats_test).get_labels() return ci, res
def evaluation_clustering(features=fea, ground_truth=gnd_raw, ncenters=10): from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation from shogun.Features import MulticlassLabels centroids = run_clustering(features, ncenters) gnd_hat = assign_labels(features, centroids, ncenters) gnd = MulticlassLabels(ground_truth) AccuracyEval = ClusteringAccuracy() AccuracyEval.best_map(gnd_hat, gnd) accuracy = AccuracyEval.evaluate(gnd_hat, gnd) #print(('Clustering accuracy = %.4f' % accuracy)) MIEval = ClusteringMutualInformation() mutual_info = MIEval.evaluate(gnd_hat, gnd) #print(('Clustering mutual information = %.4f' % mutual_info)) return gnd, accuracy, mutual_info
def classifier_qda_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_twoclass=label_traindat, tolerance=1e-4, store_covs=False): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import QDA feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_twoclass) qda = QDA(feats_train, labels, tolerance, store_covs) qda.train() qda.apply(feats_test).get_labels() qda.set_features(feats_test) return qda, qda.apply().get_labels()
def classifier_knn_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, k=3): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import KNN from shogun.Distance import EuclidianDistance feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) distance = EuclidianDistance(feats_train, feats_train) labels = MulticlassLabels(label_train_multiclass) knn = KNN(k, distance, labels) knn_train = knn.train() output = knn.apply(feats_test).get_labels() multiple_k = knn.classify_for_multiple_k() return knn, knn_train, output, multiple_k
def classifier_multiclassocas_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import MulticlassOCAS feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = MulticlassOCAS(C, feats_train, labels) classifier.train() out = classifier.apply(feats_test).get_labels() return out
def classifier_multiclassocas_modular(num_vec=10, num_class=3, distance=15, width=2.1, C=1, epsilon=1e-5, seed=1): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import MulticlassOCAS from shogun.Mathematics import Math_init_random # reproducible results random.seed(seed) Math_init_random(seed) # generate some training data where each class pair is linearly separable label_train = array([mod(x, num_class) for x in range(num_vec)], dtype="float64") label_test = array([mod(x, num_class) for x in range(num_vec)], dtype="float64") fm_train = array(random.randn(num_class, num_vec)) fm_test = array(random.randn(num_class, num_vec)) for i in range(len(label_train)): fm_train[label_train[i], i] += distance fm_test[label_test[i], i] += distance feats_train = RealFeatures(fm_train) feats_test = RealFeatures(fm_test) labels = MulticlassLabels(label_train) classifier = MulticlassOCAS(C, feats_train, labels) classifier.train() out = classifier.apply(feats_test).get_labels() #print label_test #print out return out, classifier
def classifier_larank_modular (num_vec,num_class,distance,C=0.9,num_threads=1,num_iter=5,seed=1): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Kernel import GaussianKernel from shogun.Classifier import LaRank from shogun.Mathematics import Math_init_random # reproducible results Math_init_random(seed) random.seed(seed) # generate some training data where each class pair is linearly separable label_train=array([mod(x,num_class) for x in range(num_vec)],dtype="float64") label_test=array([mod(x,num_class) for x in range(num_vec)],dtype="float64") fm_train=array(random.randn(num_class,num_vec)) fm_test=array(random.randn(num_class,num_vec)) for i in range(len(label_train)): fm_train[label_train[i],i]+=distance fm_test[label_test[i],i]+=distance feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) epsilon=1e-5 labels=MulticlassLabels(label_train) svm=LaRank(C, kernel, labels) #svm.set_tau(1e-3) svm.set_batch_mode(False) #svm.io.enable_progress() svm.set_epsilon(epsilon) svm.train() out=svm.apply(feats_test).get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
def classifier_gmnpsvm_modular(fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Kernel import GaussianKernel from shogun.Classifier import GMNPSVM feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) kernel = GaussianKernel(feats_train, feats_train, width) labels = MulticlassLabels(label_train_multiclass) svm = GMNPSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train(feats_train) kernel.init(feats_train, feats_test) out = svm.apply(feats_test).get_labels() return out, kernel
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, width=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(), feats_train, classifier, labels) mc_classifier.train() out = mc_classifier.apply().get_labels() return out
def serialization_complex_example(num=5, dist=1, dim=10, C=2.0, width=10): import os from numpy import concatenate, zeros, ones from numpy.random import randn, seed from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import GMNPSVM from shogun.Kernel import GaussianKernel from shogun.IO import SerializableHdf5File,SerializableAsciiFile, \ SerializableJsonFile,SerializableXmlFile,MSG_DEBUG from shogun.Preprocessor import NormOne, LogPlusOne seed(17) data = concatenate( (randn(dim, num), randn(dim, num) + dist, randn(dim, num) + 2 * dist, randn(dim, num) + 3 * dist), axis=1) lab = concatenate((zeros(num), ones(num), 2 * ones(num), 3 * ones(num))) feats = RealFeatures(data) #feats.io.set_loglevel(MSG_DEBUG) kernel = GaussianKernel(feats, feats, width) labels = MulticlassLabels(lab) svm = GMNPSVM(C, kernel, labels) feats.add_preprocessor(NormOne()) feats.add_preprocessor(LogPlusOne()) feats.set_preprocessed(1) svm.train(feats) #svm.print_serializable() fstream = SerializableHdf5File("blaah.h5", "w") status = svm.save_serializable(fstream) check_status(status, 'h5') fstream = SerializableAsciiFile("blaah.asc", "w") status = svm.save_serializable(fstream) check_status(status, 'asc') fstream = SerializableJsonFile("blaah.json", "w") status = svm.save_serializable(fstream) check_status(status, 'json') fstream = SerializableXmlFile("blaah.xml", "w") status = svm.save_serializable(fstream) check_status(status, 'xml') fstream = SerializableHdf5File("blaah.h5", "r") new_svm = GMNPSVM() status = new_svm.load_serializable(fstream) check_status(status, 'h5') new_svm.train() fstream = SerializableAsciiFile("blaah.asc", "r") new_svm = GMNPSVM() status = new_svm.load_serializable(fstream) check_status(status, 'asc') new_svm.train() fstream = SerializableJsonFile("blaah.json", "r") new_svm = GMNPSVM() status = new_svm.load_serializable(fstream) check_status(status, 'json') new_svm.train() fstream = SerializableXmlFile("blaah.xml", "r") new_svm = GMNPSVM() status = new_svm.load_serializable(fstream) check_status(status, 'xml') new_svm.train() os.unlink("blaah.h5") os.unlink("blaah.asc") os.unlink("blaah.json") os.unlink("blaah.xml") return svm, new_svm
def features_io_modular (fm_train_real, label_train_twoclass): import numpy from shogun.Features import SparseRealFeatures, RealFeatures, MulticlassLabels from shogun.Kernel import GaussianKernel from shogun.IO import AsciiFile, BinaryFile, HDF5File feats=SparseRealFeatures(fm_train_real) feats2=SparseRealFeatures() f=BinaryFile("fm_train_sparsereal.bin","w") feats.save(f) f=AsciiFile("fm_train_sparsereal.ascii","w") feats.save(f) f=BinaryFile("fm_train_sparsereal.bin") feats2.load(f) f=AsciiFile("fm_train_sparsereal.ascii") feats2.load(f) feats=RealFeatures(fm_train_real) feats2=RealFeatures() f=BinaryFile("fm_train_real.bin","w") feats.save(f) f=HDF5File("fm_train_real.h5","w", "/data/doubles") feats.save(f) f=AsciiFile("fm_train_real.ascii","w") feats.save(f) f=BinaryFile("fm_train_real.bin") feats2.load(f) #print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))) f=AsciiFile("fm_train_real.ascii") feats2.load(f) #print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))) lab=MulticlassLabels(numpy.array([0.0,1.0,2.0,3.0])) lab2=MulticlassLabels() f=AsciiFile("label_train_twoclass.ascii","w") lab.save(f) f=BinaryFile("label_train_twoclass.bin","w") lab.save(f) f=HDF5File("label_train_real.h5","w", "/data/labels") lab.save(f) f=AsciiFile("label_train_twoclass.ascii") lab2.load(f) f=BinaryFile("label_train_twoclass.bin") lab2.load(f) f=HDF5File("fm_train_real.h5","r", "/data/doubles") feats2.load(f) #print(feats2.get_feature_matrix()) f=HDF5File("label_train_real.h5","r", "/data/labels") lab2.load(f) #print(lab2.get_labels()) #clean up import os for f in ['fm_train_sparsereal.bin','fm_train_sparsereal.ascii', 'fm_train_real.bin','fm_train_real.h5','fm_train_real.ascii', 'label_train_real.h5', 'label_train_twoclass.ascii','label_train_twoclass.bin']: os.unlink(f) return feats, feats2, lab, lab2
def evaluation_cross_validation_multiclass_storage( traindat=traindat, label_traindat=label_traindat): from shogun.Evaluation import CrossValidation, CrossValidationResult from shogun.Evaluation import CrossValidationPrintOutput from shogun.Evaluation import CrossValidationMKLStorage, CrossValidationMulticlassStorage from shogun.Evaluation import MulticlassAccuracy, F1Measure from shogun.Evaluation import StratifiedCrossValidationSplitting from shogun.Features import MulticlassLabels from shogun.Features import RealFeatures, CombinedFeatures from shogun.Kernel import GaussianKernel, CombinedKernel from shogun.Classifier import MKLMulticlass from shogun.Mathematics import Statistics, MSG_DEBUG # training data, combined features all on same data features = RealFeatures(traindat) comb_features = CombinedFeatures() comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) comb_features.append_feature_obj(features) labels = MulticlassLabels(label_traindat) # kernel, different Gaussians combined kernel = CombinedKernel() kernel.append_kernel(GaussianKernel(10, 0.1)) kernel.append_kernel(GaussianKernel(10, 1)) kernel.append_kernel(GaussianKernel(10, 2)) # create mkl using libsvm, due to a mem-bug, interleaved is not possible svm = MKLMulticlass(1.0, kernel, labels) svm.set_kernel(kernel) # splitting strategy for 5 fold cross-validation (for classification its better # to use "StratifiedCrossValidation", but the standard # "StratifiedCrossValidationSplitting" is also available splitting_strategy = StratifiedCrossValidationSplitting(labels, 5) # evaluation method evaluation_criterium = MulticlassAccuracy() # cross-validation instance cross_validation = CrossValidation(svm, comb_features, labels, splitting_strategy, evaluation_criterium) cross_validation.set_autolock(False) # append cross vlaidation output classes #cross_validation.add_cross_validation_output(CrossValidationPrintOutput()) #mkl_storage=CrossValidationMKLStorage() #cross_validation.add_cross_validation_output(mkl_storage) multiclass_storage = CrossValidationMulticlassStorage() multiclass_storage.append_binary_evaluation(F1Measure()) cross_validation.add_cross_validation_output(multiclass_storage) cross_validation.set_num_runs(3) # perform cross-validation result = cross_validation.evaluate() roc_0_0_0 = multiclass_storage.get_fold_ROC(0, 0, 0) #print roc_0_0_0 auc_0_0_0 = multiclass_storage.get_fold_evaluation_result(0, 0, 0, 0) #print auc_0_0_0 return roc_0_0_0, auc_0_0_0