示例#1
0
def classifier_gpbtsvm_modular(train_fname=traindat,
                               test_fname=testdat,
                               label_fname=label_traindat,
                               width=2.1,
                               C=1,
                               epsilon=1e-5):
    from modshogun import RealFeatures, BinaryLabels
    from modshogun import GaussianKernel
    from modshogun import CSVFile
    try:
        from modshogun import GPBTSVM
    except ImportError:
        print("GPBTSVM not available")
        exit(0)

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = BinaryLabels(CSVFile(label_fname))
    kernel = GaussianKernel(feats_train, feats_train, width)

    svm = GPBTSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    predictions = svm.apply(feats_test)
    return predictions, svm, predictions.get_labels()
def classifier_multiclassliblinear_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        label_test_multiclass=label_testdat,
        width=2.1,
        C=1,
        epsilon=1e-5):
    from modshogun import RealFeatures, MulticlassLabels
    from modshogun import MulticlassLibLinear

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = MulticlassLibLinear(C, feats_train, labels)
    classifier.train()

    label_pred = classifier.apply(feats_test)
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from modshogun import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
示例#3
0
def classifier_multiclass_shareboost (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):
    from modshogun import RealFeatures, RealSubsetFeatures, MulticlassLabels
    from modshogun import ShareBoost

    #print('Working on a problem of %d features and %d samples' % fm_train_real.shape)

    feats_train = RealFeatures(fm_train_real)

    labels = MulticlassLabels(label_train_multiclass)

    shareboost = ShareBoost(feats_train, labels, min(fm_train_real.shape[0]-1, 30))
    shareboost.train();
    #print(shareboost.get_activeset())

    feats_test  = RealSubsetFeatures(RealFeatures(fm_test_real), shareboost.get_activeset())
    label_pred = shareboost.apply(feats_test)

    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from modshogun import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        #print('Accuracy = %.4f' % acc)

    return out
def regression_svrlight_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat, \
				    width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3):


	from modshogun import RegressionLabels, RealFeatures
	from modshogun import GaussianKernel
	try:
		from modshogun import SVRLight
	except ImportError:
		print('No support for SVRLight available.')
		return

	feats_train=RealFeatures(fm_train)
	feats_test=RealFeatures(fm_test)

	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=RegressionLabels(label_train)

	svr=SVRLight(C, epsilon, kernel, labels)
	svr.set_tube_epsilon(tube_epsilon)
	svr.parallel.set_num_threads(num_threads)
	svr.train()

	kernel.init(feats_train, feats_test)
	out = svr.apply().get_labels()

	return out, kernel
示例#5
0
        def RunDTCShogun(q):
            totalTimer = Timer()

            Log.Info("Loading dataset", self.verbose)
            trainData, labels = SplitTrainData(self.dataset)
            trainData = RealFeatures(trainData.T)
            labels = MulticlassLabels(labels)
            testData = RealFeatures(LoadDataset(self.dataset[1]).T)

            if len(options) > 0:
                Log.Fatal("Unknown parameters: " + str(options))
                raise Exception("unknown parameters")

            try:
                with totalTimer:
                    self.model = self.BuildModel(trainData, labels, options)
                    # Run the CARTree Classifier on the test dataset.
                    self.model.apply_multiclass(testData).get_labels()
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)

            return time
示例#6
0
        def RunNBCShogun():
            totalTimer = Timer()
            self.predictions = None
            Log.Info("Loading dataset", self.verbose)
            try:
                # Load train and test dataset.
                trainData = np.genfromtxt(self.dataset[0], delimiter=',')
                testData = np.genfromtxt(self.dataset[1], delimiter=',')

                # Labels are the last row of the training set.
                labels = MulticlassLabels(trainData[:,
                                                    (trainData.shape[1] - 1)])

                with totalTimer:
                    # Transform into features.
                    trainFeat = RealFeatures(trainData[:, :-1].T)
                    testFeat = RealFeatures(testData.T)

                    # Create and train the classifier.
                    self.model = self.BuildModel(trainFeat, labels, options)

                    # Run Naive Bayes Classifier on the test dataset.
                    self.predictions = self.model.apply_multiclass(
                        testFeat).get_labels()

            except Exception as e:
                return [-1]

            time = totalTimer.ElapsedTime()
            if len(self.dataset) > 1:
                return [time, self.predictions]

            return [time]
def classifier_multiclassocas_modular (num_vec=10,num_class=3,distance=15,width=2.1,C=1,epsilon=1e-5,seed=1):
	from modshogun import RealFeatures, MulticlassLabels
	from modshogun import MulticlassOCAS
	from modshogun import Math_init_random

	# reproducible results
	random.seed(seed)
	Math_init_random(seed)

	# generate some training data where each class pair is linearly separable
	label_train=array([mod(x,num_class) for x in range(num_vec)],dtype="float64")
	label_test=array([mod(x,num_class) for x in range(num_vec)],dtype="float64")
	fm_train=array(random.randn(num_class,num_vec))
	fm_test=array(random.randn(num_class,num_vec))
	for i in range(len(label_train)):
		fm_train[label_train[i],i]+=distance
		fm_test[label_test[i],i]+=distance

	feats_train=RealFeatures(fm_train)
	feats_test=RealFeatures(fm_test)

	labels=MulticlassLabels(label_train)

	classifier = MulticlassOCAS(C,feats_train,labels)
	classifier.train()

	out = classifier.apply(feats_test).get_labels()
	#print label_test
	#print out
	return out,classifier
def classifier_multiclassmachine_modular(fm_train_real=traindat,
                                         fm_test_real=testdat,
                                         label_train_multiclass=label_traindat,
                                         width=2.1,
                                         C=1,
                                         epsilon=1e-5):
    from modshogun import RealFeatures, MulticlassLabels
    from modshogun import GaussianKernel
    from modshogun import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)
    kernel = GaussianKernel(feats_train, feats_train, width)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibSVM()
    classifier.set_epsilon(epsilon)
    #print labels.get_labels()
    mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),
                                            kernel, classifier, labels)
    mc_classifier.train()

    kernel.init(feats_train, feats_test)
    out = mc_classifier.apply().get_labels()
    return out
示例#9
0
    def RunRandomForestShogun(q):
      totalTimer = Timer()

      Log.Info("Loading dataset", self.verbose)
      trainData, labels = SplitTrainData(self.dataset)
      trainData = RealFeatures(trainData.T)
      labels = MulticlassLabels(labels)
      testData = RealFeatures(LoadDataset(self.dataset[1]).T)

      # Number of Trees.
      n = re.search("-n (\d+)", options)
      # Number of attributes to be chosen randomly to select from.
      f = re.search("-f (\d+)", options)

      self.form = 1 if not f else int(f.group(1))
      self.numTrees = 10 if not n else int(n.group(1))

      try:
        with totalTimer:
          self.model = self.BuildModel(trainData, labels, options)
          # Run the Random Forest Classifier on the test dataset.
          self.model.apply_multiclass(testData).get_labels()
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
def classifier_multiclasslogisticregression_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        label_test_multiclass=label_testdat,
        z=1,
        epsilon=1e-5):
    from modshogun import RealFeatures, MulticlassLabels
    try:
        from modshogun import MulticlassLogisticRegression
    except ImportError:
        print("recompile shogun with Eigen3 support")
        return

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = MulticlassLogisticRegression(z, feats_train, labels)
    classifier.train()

    label_pred = classifier.apply(feats_test)
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from modshogun import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
示例#11
0
def metric_lmnn_modular(train_fname=traindat,
                        test_fname=testdat,
                        label_train_fname=label_traindat,
                        k=3):
    try:
        from modshogun import RealFeatures, MulticlassLabels, LMNN, KNN, CSVFile
    except ImportError:
        return

    # wrap features and labels into Shogun objects
    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = MulticlassLabels(CSVFile(label_train_fname))

    # LMNN
    lmnn = LMNN(feats_train, labels, k)
    lmnn.train()
    lmnn_distance = lmnn.get_distance()

    # perform classification with KNN
    knn = KNN(k, lmnn_distance, labels)
    knn.train()
    output = knn.apply(feats_test).get_labels()

    return lmnn, output
示例#12
0
    def RunLinearRegressionShogun(q):
      totalTimer = Timer()

      # Load input dataset.
      # If the dataset contains two files then the second file is the responses
      # file.
      try:
        Log.Info("Loading dataset", self.verbose)
        if len(self.dataset) == 2:
          testSet = np.genfromtxt(self.dataset[1], delimiter=',')

        # Use the last row of the training set as the responses.
        X, y = SplitTrainData(self.dataset)

        with totalTimer:
          # Perform linear regression.
          model = LeastSquaresRegression(RealFeatures(X.T), RegressionLabels(y))
          model.train()
          b = model.get_w()

          if len(self.dataset) == 2:
            pred = classifier.apply(RealFeatures(testSet.T))
            self.predictions = pred.get_labels()

      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
示例#13
0
def classifier_svmocas_modular(train_fname=traindat,
                               test_fname=testdat,
                               label_fname=label_traindat,
                               C=0.9,
                               epsilon=1e-5,
                               num_threads=1):
    from modshogun import RealFeatures, BinaryLabels
    from modshogun import CSVFile
    try:
        from modshogun import SVMOcas
    except ImportError:
        print("SVMOcas not available")
        return

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = BinaryLabels(CSVFile(label_fname))

    svm = SVMOcas(C, feats_train, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.set_bias_enabled(False)
    svm.train()

    bias = svm.get_bias()
    w = svm.get_w()
    predictions = svm.apply(feats_test)
    return predictions, svm, predictions.get_labels()
示例#14
0
def preprocessor_normone_modular(fm_train_real=traindat,
                                 fm_test_real=testdat,
                                 width=1.4,
                                 size_cache=10):

    from modshogun import Chi2Kernel
    from modshogun import RealFeatures
    from modshogun import NormOne

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    preprocessor = NormOne()
    preprocessor.init(feats_train)
    feats_train.add_preprocessor(preprocessor)
    feats_train.apply_preprocessor()
    feats_test.add_preprocessor(preprocessor)
    feats_test.apply_preprocessor()

    kernel = Chi2Kernel(feats_train, feats_train, width, size_cache)

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()

    return km_train, km_test, kernel
def classifier_multiclasslinearmachine_modular(
        fm_train_real=traindat,
        fm_test_real=testdat,
        label_train_multiclass=label_traindat,
        label_test_multiclass=label_testdat,
        width=2.1,
        C=1,
        epsilon=1e-5):
    from modshogun import RealFeatures, MulticlassLabels
    from modshogun import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)
    mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(),
                                            feats_train, classifier, labels)

    mc_classifier.train()
    label_pred = mc_classifier.apply()
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from modshogun import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
示例#16
0
def kernel_combined_modular (fm_train_real=traindat,fm_test_real=testdat,fm_train_dna=traindna,fm_test_dna=testdna ):
	from modshogun import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel
	from modshogun import RealFeatures, StringCharFeatures, CombinedFeatures, DNA

	kernel=CombinedKernel()
	feats_train=CombinedFeatures()
	feats_test=CombinedFeatures()

	subkfeats_train=RealFeatures(fm_train_real)
	subkfeats_test=RealFeatures(fm_test_real)
	subkernel=GaussianKernel(10, 1.1)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train=StringCharFeatures(fm_train_dna, DNA)
	subkfeats_test=StringCharFeatures(fm_test_dna, DNA)
	degree=3
	subkernel=FixedDegreeStringKernel(10, degree)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train=StringCharFeatures(fm_train_dna, DNA)
	subkfeats_test=StringCharFeatures(fm_test_dna, DNA)
	subkernel=LocalAlignmentStringKernel(10)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	kernel.init(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
def multiclass_randomforest_modular(train=traindat,
                                    test=testdat,
                                    labels=label_traindat,
                                    ft=feattypes):
    try:
        from modshogun import RealFeatures, MulticlassLabels, CSVFile, RandomForest, MajorityVote
    except ImportError:
        print("Could not import Shogun modules")
        return

    # wrap features and labels into Shogun objects
    feats_train = RealFeatures(CSVFile(train))
    feats_test = RealFeatures(CSVFile(test))
    train_labels = MulticlassLabels(CSVFile(labels))

    # Random Forest formation
    rand_forest = RandomForest(feats_train, train_labels, 20, 1)
    rand_forest.set_feature_types(ft)
    rand_forest.set_combination_rule(MajorityVote())
    rand_forest.train()

    # Classify test data
    output = rand_forest.apply_multiclass(feats_test).get_labels()

    return rand_forest, output
示例#18
0
文件: nbc.py 项目: nehagup/benchmarks
        def RunNBCShogun(q):
            totalTimer = Timer()

            Log.Info("Loading dataset", self.verbose)
            try:
                # Load train and test dataset.
                trainData = np.genfromtxt(self.dataset[0], delimiter=',')
                testData = np.genfromtxt(self.dataset[1], delimiter=',')

                # Labels are the last row of the training set.
                labels = MulticlassLabels(trainData[:,
                                                    (trainData.shape[1] - 1)])

                with totalTimer:
                    # Transform into features.
                    trainFeat = RealFeatures(trainData[:, :-1].T)
                    testFeat = RealFeatures(testData.T)

                    # Create and train the classifier.
                    nbc = GaussianNaiveBayes(trainFeat, labels)
                    nbc.train()

                    # Run Naive Bayes Classifier on the test dataset.
                    nbc.apply(testFeat).get_labels()
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
示例#19
0
def preprocessor_randomfouriergausspreproc_modular(fm_train_real=traindat,
                                                   fm_test_real=testdat,
                                                   width=1.4,
                                                   size_cache=10):
    from modshogun import Chi2Kernel
    from modshogun import RealFeatures
    from modshogun import RandomFourierGaussPreproc

    feats_train = RealFeatures(fm_train_real)
    feats_test = RealFeatures(fm_test_real)

    preproc = RandomFourierGaussPreproc()
    preproc.init(feats_train)
    feats_train.add_preprocessor(preproc)
    feats_train.apply_preprocessor()
    feats_test.add_preprocessor(preproc)
    feats_test.apply_preprocessor()

    kernel = Chi2Kernel(feats_train, feats_train, width, size_cache)

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()

    return km_train, km_test, kernel
def classifier_multiclass_relaxedtree(fm_train_real=traindat,
                                      fm_test_real=testdat,
                                      label_train_multiclass=label_traindat,
                                      label_test_multiclass=label_testdat,
                                      lawidth=2.1,
                                      C=1,
                                      epsilon=1e-5):
    from modshogun import RealFeatures, MulticlassLabels
    from modshogun import RelaxedTree, MulticlassLibLinear
    from modshogun import GaussianKernel

    #print('Working on a problem of %d features and %d samples' % fm_train_real.shape)

    feats_train = RealFeatures(fm_train_real)

    labels = MulticlassLabels(label_train_multiclass)

    machine = RelaxedTree()
    machine.set_machine_for_confusion_matrix(MulticlassLibLinear())
    machine.set_kernel(GaussianKernel())
    machine.set_labels(labels)
    machine.train(feats_train)

    label_pred = machine.apply_multiclass(RealFeatures(fm_test_real))
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from modshogun import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out
示例#21
0
        def RunRandomForestShogun():
            totalTimer = Timer()

            Log.Info("Loading dataset", self.verbose)
            trainData, labels = SplitTrainData(self.dataset)
            trainData = RealFeatures(trainData.T)
            labels = MulticlassLabels(labels)
            testData = RealFeatures(LoadDataset(self.dataset[1]).T)

            if "num_trees" in options:
                self.numTrees = int(options.pop("num_trees"))
            else:
                Log.Fatal("Required parameter 'num_trees' not specified!")
                raise Exception("missing parameter")

            self.form = 1
            if "dimensions" in options:
                self.form = int(options.pop("dimensions"))

            if len(options) > 0:
                Log.Fatal("Unknown parameters: " + str(options))
                raise Exception("unknown parameters")

            try:
                with totalTimer:
                    self.model = self.BuildModel(trainData, labels, options)
                    # Run the Random Forest Classifier on the test dataset.
                    self.predictions = self.model.apply_multiclass(
                        testData).get_labels()
            except Exception as e:
                return [-1]

            time = totalTimer.ElapsedTime()
            return [time, self.predictions]
示例#22
0
    def predict(self, image):
        """
        Predict the face
        """
        #image as row
        imageAsRow = np.asarray(
            image.reshape(image.shape[0] * image.shape[1], 1), np.float64)
        #project inthe subspace
        p = self.pca.apply_to_feature_vector(
            RealFeatures(imageAsRow).get_feature_vector(0))

        #min value to find the face
        minDist = 1e100
        #class
        minClass = -1
        #search which face is the best match
        for sampleIdx in range(len(self._projections)):
            test = RealFeatures(np.asmatrix(p, np.float64).T)
            projection = RealFeatures(
                np.asmatrix(self._projections[sampleIdx], np.float64).T)
            dist = EuclideanDistance(test, projection).distance(0, 0)

            if (dist < minDist):
                minDist = dist
                minClass = self._labels[sampleIdx]

        return minClass
示例#23
0
def regression_cartree_modular(num_train=500,num_test=50,x_range=15,noise_var=0.2,ft=feattypes):
	try:
		from modshogun import RealFeatures, RegressionLabels, CSVFile, CARTree, PT_REGRESSION
		from numpy import random
	except ImportError:
		print("Could not import Shogun and/or numpy modules")
		return

	random.seed(1)

	# form training dataset : y=x with noise
	X_train=random.rand(1,num_train)*x_range;
	Y_train=X_train+random.randn(num_train)*noise_var

	# form test dataset
	X_test=array([[float(i)/num_test*x_range for i in range(num_test)]])

	# wrap features and labels into Shogun objects
	feats_train=RealFeatures(X_train)
	feats_test=RealFeatures(X_test)
	train_labels=RegressionLabels(Y_train[0])

	# CART Tree formation
	c=CARTree(ft,PT_REGRESSION,5,True)
	c.set_labels(train_labels)
	c.train(feats_train)

	# Classify test data
	output=c.apply_regression(feats_test).get_labels()

	return c,output
示例#24
0
def features_dense_zero_copy_modular(in_data=data):
    feats = None
    if numpy.__version__ >= '1.5':
        feats = numpy.array(in_data, dtype=float64, order='F')

        a = RealFeatures()
        a.frombuffer(feats, False)

        b = numpy.array(a, copy=False)
        c = numpy.array(a, copy=True)

        d = RealFeatures()
        d.frombuffer(a, False)

        e = RealFeatures()
        e.frombuffer(a, True)

        a[:, 0] = 0
        #print a[0:4]
        #print b[0:4]
        #print c[0:4]
        #print d[0:4]
        #print e[0:4]
    else:
        print("numpy version >= 1.5 is needed")

    return feats
示例#25
0
    def RunLinearRidgeRegressionShogun(q):
      totalTimer = Timer()

      # Load input dataset.
      # If the dataset contains two files then the second file is the responses
      # file.
      Log.Info("Loading dataset", self.verbose)
      if len(self.dataset) >= 2:
        testSet = np.genfromtxt(self.dataset[1], delimiter=',')

      # Use the last row of the training set as the responses.
      X, y = SplitTrainData(self.dataset)
      tau = re.search("-t (\d+)", options)
      tau = 1.0 if not tau else int(tau.group(1))

      try:
        with totalTimer:
          # Perform linear ridge regression.
          model = LRR(tau, RealFeatures(X.T), RegressionLabels(y))
          model.train()

          if len(self.dataset) >= 2:
            model.apply_regression(RealFeatures(testSet.T))

      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
示例#26
0
  def RunMetrics(self, options):
    Log.Info("Perform Linear Ridge Regression.", self.verbose)

    results = self.LinearRidgeRegressionShogun(options)
    if results < 0:
      return results

    metrics = {'Runtime' : results}

    if len(self.dataset) >= 3:

      X, y = SplitTrainData(self.dataset)
      tau = re.search("-t (\d+)", options)
      tau = 1.0 if not tau else int(tau.group(1))
      model = LRR(tau, RealFeatures(X.T), RegressionLabels(y))
      model.train()

      testData = LoadDataset(self.dataset[1])
      truelabels = LoadDataset(self.dataset[2])

      predictedlabels = model.apply_regression(RealFeatures(testData.T)).get_labels()

      SimpleMSE = Metrics.SimpleMeanSquaredError(truelabels, predictedlabels)
      metrics['Simple MSE'] = SimpleMSE
      return metrics

    else:
      Log.Fatal("This method requires three datasets!")
示例#27
0
    def RunKMeansShogun(q):
      totalTimer = Timer()

      # Load input dataset.
      # If the dataset contains two files then the second file is the centroids
      # file.
      Log.Info("Loading dataset", self.verbose)
      if len(self.dataset) == 2:
        data = np.genfromtxt(self.dataset[0], delimiter=',')
        centroids = np.genfromtxt(self.dataset[1], delimiter=',')
      else:
        data = np.genfromtxt(self.dataset[0], delimiter=',')

      # Gather parameters.
      clusters = re.search("-c (\d+)", options)
      maxIterations = re.search("-m (\d+)", options)
      seed = re.search("-s (\d+)", options)

      # Now do validation of options.
      if not clusters and len(self.dataset) != 2:
        Log.Fatal("Required option: Number of clusters or cluster locations.")
        q.put(-1)
        return -1
      elif (not clusters or int(clusters.group(1)) < 1) and len(self.dataset) != 2:
        Log.Fatal("Invalid number of clusters requested! Must be greater than"
            + " or equal to 1.")
        q.put(-1)
        return -1

      m = 1000 if not maxIterations else int(maxIterations.group(1))


      if seed:
        Math_init_random(seed.group(1))
      try:
        dataFeat = RealFeatures(data.T)
        distance = EuclideanDistance(dataFeat, dataFeat)

        # Create the K-Means object and perform K-Means clustering.
        with totalTimer:
          if len(self.dataset) == 2:
            model = KMeans(int(clusters.group(1)), distance, RealFeatures(centroids))
          else:
            model = KMeans(int(clusters.group(1)), distance)

          model.set_mbKMeans_iter(m)
          model.train()

          labels = model.apply().get_labels()
          centers = model.get_cluster_centers()
      except Exception as e:
        print(e)
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
def regression_gaussian_process_modular (n=100,n_test=100, \
  x_range=6,x_range_test=10,noise_var=0.5,width=1, seed=1):

    from modshogun import RealFeatures, RegressionLabels, GaussianKernel, Math
    try:
        from modshogun import GaussianLikelihood, ZeroMean, \
          ExactInferenceMethod, GaussianProcessRegression
    except ImportError:
        print("Eigen3 needed for Gaussian Processes")
        return

    # reproducable results
    random.seed(seed)
    Math.init_random(17)

    # easy regression data: one dimensional noisy sine wave
    X = random.rand(1, n) * x_range

    X_test = array([[float(i) / n_test * x_range_test for i in range(n_test)]])
    Y_test = sin(X_test)
    Y = sin(X) + random.randn(n) * noise_var

    # shogun representation
    labels = RegressionLabels(Y[0])
    feats_train = RealFeatures(X)
    feats_test = RealFeatures(X_test)

    # GP specification
    shogun_width = width * width * 2
    kernel = GaussianKernel(10, shogun_width)
    zmean = ZeroMean()
    lik = GaussianLikelihood()
    lik.set_sigma(noise_var)
    inf = ExactInferenceMethod(kernel, feats_train, zmean, labels, lik)

    # train GP
    gp = GaussianProcessRegression(inf)
    gp.train()

    # some things we can do
    alpha = inf.get_alpha()
    diagonal = inf.get_diagonal_vector()
    cholesky = inf.get_cholesky()

    # get mean and variance vectors
    mean = gp.get_mean_vector(feats_test)
    variance = gp.get_variance_vector(feats_test)

    # plot results
    #plot(X[0],Y[0],'x') # training observations
    #plot(X_test[0],Y_test[0],'-') # ground truth of test
    #plot(X_test[0],mean, '-') # mean predictions of test
    #fill_between(X_test[0],mean-1.96*sqrt(variance),mean+1.96*sqrt(variance),color='grey')  # 95% confidence interval
    #legend(["training", "ground truth", "mean predictions"])

    #show()

    return alpha, diagonal, round(variance, 12), round(mean, 12), cholesky
示例#29
0
        def RunAllKnnShogun(q):
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the query
            # file.
            try:
                Log.Info("Loading dataset", self.verbose)
                if len(self.dataset) == 2:
                    referenceData = np.genfromtxt(self.dataset[0],
                                                  delimiter=',')
                    queryData = np.genfromtxt(self.dataset[1], delimiter=',')
                    queryFeat = RealFeatures(queryFeat.T)
                else:
                    referenceData = np.genfromtxt(self.dataset, delimiter=',')

                # Labels are the last row of the dataset.
                labels = MulticlassLabels(
                    referenceData[:, (referenceData.shape[1] - 1)])
                referenceData = referenceData[:, :-1]

                with totalTimer:
                    # Get all the parameters.
                    k = re.search("-k (\d+)", options)
                    if not k:
                        Log.Fatal(
                            "Required option: Number of furthest neighbors to find."
                        )
                        q.put(-1)
                        return -1
                    else:
                        k = int(k.group(1))
                        if (k < 1 or k > referenceData.shape[0]):
                            Log.Fatal("Invalid k: " + k.group(1) +
                                      "; must be greater than 0" +
                                      " and less or equal than " +
                                      str(referenceData.shape[0]))
                            q.put(-1)
                            return -1

                    referenceFeat = RealFeatures(referenceData.T)
                    distance = EuclideanDistance(referenceFeat, referenceFeat)

                    # Perform All K-Nearest-Neighbors.
                    model = SKNN(k, distance, labels)
                    model.train()

                    if len(self.dataset) == 2:
                        out = model.apply(queryFeat).get_labels()
                    else:
                        out = model.apply(referenceFeat).get_labels()
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
示例#30
0
        def RunLDAShogun():
            totalTimer = Timer()

            # Load input dataset.
            # If the dataset contains two files then the second file is the test file.
            try:
                if len(self.dataset) > 1:
                    testSet = LoadDataset(self.dataset[1])

                # Use the last row of the training set as the responses.
                trainSet, trainLabels = SplitTrainData(self.dataset)
                # if the labels are not in {0,1,2,...,num_classes-1}, map them to this set and store the mapping
                # shogun's MCLDA class requires the labels to be in {0,1,2,...,num_classes-1}
                distinctLabels = list(set(trainLabels))
                mapping = {}
                reverseMapping = {}
                idx = 0
                for label in distinctLabels:
                    mapping[label] = idx
                    reverseMapping[idx] = label
                    idx += 1
                for i in range(len(trainLabels)):
                    trainLabels[i] = mapping[trainLabels[i]]

                trainFeat = RealFeatures(trainSet.T)
                trainLabels = MulticlassLabels(trainLabels)
                # Gather optional parameters.
                if "tolerance" in options:
                    self.tolerance = float(options.pop("tolerance"))

                if "store" in options:
                    self.store = bool(options.pop("store"))

                if (len(options) > 0):
                    Log.Fatal("Unknown parameters: " + str(options))
                    raise Exception("unknown parameters")

                with totalTimer:
                    self.model = MCLDA(trainFeat, trainLabels, self.tolerance,
                                       self.store)
                    self.model.train()

                if (len(self.dataset) > 0):
                    self.predictions = self.model.apply_multiclass(
                        RealFeatures(testSet.T))
                    self.predictions = self.predictions.get_labels()
                    # reverse map the predicted labels to actual labels
                    for i in range(len(self.predictions)):
                        self.predictions[i] = reverseMapping[
                            self.predictions[i]]

            except Exception as e:
                Log.Info("Exception: " + str(e))
                return -1

            time = totalTimer.ElapsedTime()
            return time