示例#1
0
def converter_localitypreservingprojections_modular(data_fname, k):
    try:
        from modshogun import RealFeatures, LocalityPreservingProjections, CSVFile

        features = RealFeatures(CSVFile(data_fname))
        converter = LocalityPreservingProjections()
        converter.set_target_dim(1)
        converter.set_k(k)
        converter.set_tau(2.0)
        converter.apply(features)

        return features
    except ImportError:
        print('No Eigen3 available')
def converter_localtangentspacealignment_modular(data_fname, k):
    try:
        from modshogun import RealFeatures, LocalTangentSpaceAlignment, CSVFile

        features = RealFeatures(CSVFile(data_fname))

        converter = LocalTangentSpaceAlignment()
        converter.set_target_dim(1)
        converter.set_k(k)
        converter.apply(features)

        return features
    except ImportError:
        print('No Eigen3 available')
def classifier_libsvm_modular(train_fname=traindat,
                              test_fname=testdat,
                              label_fname=label_traindat,
                              width=2.1,
                              C=1,
                              epsilon=1e-5):
    from modshogun import RealFeatures, BinaryLabels
    from modshogun import GaussianKernel, LibSVM, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = BinaryLabels(CSVFile(label_fname))
    kernel = GaussianKernel(feats_train, feats_train, width)

    svm = LibSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    supportvectors = sv_idx = svm.get_support_vectors()
    alphas = svm.get_alphas()
    predictions = svm.apply(feats_test)
    #print predictions.get_labels()
    return predictions, svm, predictions.get_labels()
示例#4
0
def evaluation_cross_validation_regression(train_fname=traindat,
                                           label_fname=label_traindat,
                                           width=0.8,
                                           tau=1e-6):
    from modshogun import CrossValidation, CrossValidationResult
    from modshogun import MeanSquaredError, CrossValidationSplitting
    from modshogun import RegressionLabels, RealFeatures
    from modshogun import GaussianKernel, KernelRidgeRegression, CSVFile

    # training data
    features = RealFeatures(CSVFile(train_fname))
    labels = RegressionLabels(CSVFile(label_fname))

    # kernel and predictor
    kernel = GaussianKernel()
    predictor = KernelRidgeRegression(tau, kernel, labels)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but here, the std x-val is used
    splitting_strategy = CrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium = MeanSquaredError()

    # cross-validation instance
    cross_validation = CrossValidation(predictor, features, labels,
                                       splitting_strategy,
                                       evaluation_criterium)

    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) tell machine to precompute kernel matrix. speeds up. may not work
    predictor.data_lock(labels, features)

    # perform cross-validation and print(results)
    result = cross_validation.evaluate()
def multiclass_chaidtree_modular(train=traindat,
                                 test=testdat,
                                 labels=label_traindat,
                                 ft=feattypes):
    try:
        from modshogun import RealFeatures, MulticlassLabels, CSVFile, CHAIDTree
    except ImportError:
        print("Could not import Shogun modules")
        return

    # wrap features and labels into Shogun objects
    feats_train = RealFeatures(CSVFile(train))
    feats_test = RealFeatures(CSVFile(test))
    train_labels = MulticlassLabels(CSVFile(labels))

    # CHAID Tree formation with nominal dependent variable
    c = CHAIDTree(0, feattypes, 10)
    c.set_labels(train_labels)
    c.train(feats_train)

    # Classify test data
    output = c.apply_multiclass(feats_test).get_labels()

    return c, output
示例#6
0
def converter_diffusionmaps_modular(data_fname, t):
    try:
        from modshogun import RealFeatures, DiffusionMaps, GaussianKernel, CSVFile

        features = RealFeatures(CSVFile(data_fname))

        converter = DiffusionMaps()
        converter.set_target_dim(1)
        converter.set_kernel(GaussianKernel(10, 10.0))
        converter.set_t(t)
        converter.apply(features)

        return features
    except ImportError:
        print('No Eigen3 available')
示例#7
0
def multiclass_cartree_modular(train=traindat,
                               test=testdat,
                               labels=label_traindat,
                               ft=feattypes):
    try:
        from modshogun import RealFeatures, MulticlassLabels, CSVFile, CARTree, PT_MULTICLASS
    except ImportError:
        print("Could not import Shogun modules")
        return

    # wrap features and labels into Shogun objects
    feats_train = RealFeatures(CSVFile(train))
    feats_test = RealFeatures(CSVFile(test))
    train_labels = MulticlassLabels(CSVFile(labels))

    # CART Tree formation with 5 fold cross-validation pruning
    c = CARTree(ft, PT_MULTICLASS, 5, True)
    c.set_labels(train_labels)
    c.train(feats_train)

    # Classify test data
    output = c.apply_multiclass(feats_test).get_labels()

    return c, output
示例#8
0
def kernel_io_modular(train_fname=traindat, test_fname=testdat, width=1.9):
    from modshogun import RealFeatures, GaussianKernel, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    kernel = GaussianKernel(feats_train, feats_train, width)
    km_train = kernel.get_kernel_matrix()
    f = CSVFile("tmp/gaussian_train.csv", "w")
    kernel.save(f)
    del f

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    f = CSVFile("tmp/gaussian_test.csv", "w")
    kernel.save(f)
    del f

    #clean up
    import os
    os.unlink("tmp/gaussian_test.csv")
    os.unlink("tmp/gaussian_train.csv")

    return km_train, km_test, kernel
示例#9
0
def converter_laplacianeigenmaps_modular (data_fname,k):
	try:
		from modshogun import RealFeatures, LaplacianEigenmaps, CSVFile

		features = RealFeatures(CSVFile(data_fname))

		converter = LaplacianEigenmaps()
		converter.set_target_dim(1)
		converter.set_k(k)
		converter.set_tau(20.0)
		converter.apply(features)

		return features
	except ImportError:
		print('No Eigen3 available')
示例#10
0
def converter_kernellocallylinearembedding_modular(data_fname, k):
    try:
        from modshogun import RealFeatures, KernelLocallyLinearEmbedding, LinearKernel, CSVFile

        features = RealFeatures(CSVFile(data_fname))

        kernel = LinearKernel()

        converter = KernelLocallyLinearEmbedding(kernel)
        converter.set_target_dim(1)
        converter.set_k(k)
        converter.apply(features)

        return features
    except ImportError:
        print('No Eigen3 available')
def converter_factoranalysis_modular(data_fname):
    try:
        import numpy
        from modshogun import RealFeatures, FactorAnalysis, EuclideanDistance, CSVFile

        features = RealFeatures(CSVFile(data_fname))

        converter = FactorAnalysis()
        converter.set_target_dim(2)
        embedding = converter.apply(features)

        X = embedding.get_feature_matrix()
        covdet = numpy.linalg.det(numpy.dot(X, X.T))

        return covdet > 0
    except ImportError:
        print('No Eigen3 available')
示例#12
0
def converter_tdistributedstochasticneighborembedding_modular(
        data_fname, seed=1):
    try:
        from modshogun import RealFeatures, TDistributedStochasticNeighborEmbedding
        from modshogun import Math_init_random, CSVFile

        # reproducible results
        Math_init_random(seed)
        features = RealFeatures(CSVFile(data_fname))

        converter = TDistributedStochasticNeighborEmbedding()
        converter.set_target_dim(2)

        embedding = converter.apply(features)

        return embedding
    except ImportError:
        print('No Eigen3 available')
示例#13
0
def converter_locallylinearembedding_modular(data_fname, k):
    try:
        from modshogun import RealFeatures, CSVFile
        try:
            from modshogun import LocallyLinearEmbedding
        except ImportError:
            print("LocallyLinearEmbedding not available")
            exit(0)

        features = RealFeatures(CSVFile(data_fname))

        converter = LocallyLinearEmbedding()
        converter.set_target_dim(1)
        converter.set_k(k)
        converter.apply(features)

        return features
    except ImportError:
        print('No Eigen3 available')
示例#14
0
def converter_stochasticproximityembedding_modular (data_fname, k):
	try:
		from modshogun import RealFeatures,StochasticProximityEmbedding, SPE_GLOBAL, SPE_LOCAL, CSVFile

		features = RealFeatures(CSVFile(data_fname))

		converter = StochasticProximityEmbedding()
		converter.set_target_dim(1)
		converter.set_nupdates(40)
		# Embed with local strategy
		converter.set_k(k)
		converter.set_strategy(SPE_LOCAL)
		converter.embed(features)
		# Embed with global strategy
		converter.set_strategy(SPE_GLOBAL)
		converter.embed(features)

		return features
	except ImportError:
		print('No Eigen3 available')
示例#15
0
def metric_lmnn_statistics(
        k=3,
        fname_features='../../data/fm_train_multiclass_digits.dat.gz',
        fname_labels='../../data/label_train_multiclass_digits.dat'):
    try:
        from modshogun import LMNN, CSVFile, RealFeatures, MulticlassLabels, MSG_DEBUG
        import matplotlib.pyplot as pyplot
    except ImportError:
        print 'Error importing modshogun or other required modules. Please, verify their installation.'
        return

    features = RealFeatures(load_compressed_features(fname_features).T)
    labels = MulticlassLabels(CSVFile(fname_labels))

    #	print 'number of examples = %d' % features.get_num_vectors()
    #	print 'number of features = %d' % features.get_num_features()

    assert (features.get_num_vectors() == labels.get_num_labels())

    # train LMNN
    lmnn = LMNN(features, labels, k)
    lmnn.set_correction(100)
    #	lmnn.io.set_loglevel(MSG_DEBUG)
    print 'Training LMNN, this will take about two minutes...'
    lmnn.train()
    print 'Training done!'

    # plot objective obtained during training
    statistics = lmnn.get_statistics()

    pyplot.plot(statistics.obj.get())
    pyplot.grid(True)
    pyplot.xlabel('Iterations')
    pyplot.ylabel('LMNN objective')
    pyplot.title(
        'LMNN objective during training for the multiclass digits data set')

    pyplot.show()
示例#16
0
#!/usr/bin/python

from modshogun import CSVFile, RealFeatures, RescaleFeatures
from scipy.linalg import solve_triangular, cholesky, sqrtm, inv
import matplotlib.pyplot as pyplot
import numpy

# load wine features
features = RealFeatures(CSVFile('../data/fm_wine.dat'))

print('%d vectors with %d features.' %
      (features.get_num_vectors(), features.get_num_features()))
print('original features mean = ' + str(numpy.mean(features, axis=1)))

# rescale the features to [0,1]
feature_rescaling = RescaleFeatures()
feature_rescaling.init(features)
features.add_preprocessor(feature_rescaling)
features.apply_preprocessor()

print('mean after rescaling = ' + str(numpy.mean(features, axis=1)))

# remove mean from data
data = features.get_feature_matrix()
data = data.T
data -= numpy.mean(data, axis=0)
print numpy.mean(data, axis=0)

fig, axarr = pyplot.subplots(1, 2)
axarr[0].matshow(numpy.cov(data.T))
示例#17
0
def features_io_modular(fm_train_real, label_train_twoclass):
    import numpy
    from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels
    from modshogun import GaussianKernel
    from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File

    feats = SparseRealFeatures(fm_train_real)
    feats2 = SparseRealFeatures()

    f = BinaryFile("fm_train_sparsereal.bin", "w")
    feats.save(f)

    f = LibSVMFile("fm_train_sparsereal.ascii", "w")
    feats.save(f)

    f = BinaryFile("fm_train_sparsereal.bin")
    feats2.load(f)

    f = LibSVMFile("fm_train_sparsereal.ascii")
    feats2.load(f)

    feats = RealFeatures(fm_train_real)
    feats2 = RealFeatures()

    f = BinaryFile("fm_train_real.bin", "w")
    feats.save(f)

    f = HDF5File("fm_train_real.h5", "w", "/data/doubles")
    feats.save(f)

    f = CSVFile("fm_train_real.ascii", "w")
    feats.save(f)

    f = BinaryFile("fm_train_real.bin")
    feats2.load(f)
    #print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

    f = CSVFile("fm_train_real.ascii")
    feats2.load(f)
    #print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

    lab = MulticlassLabels(numpy.array([0.0, 1.0, 2.0, 3.0]))
    lab2 = MulticlassLabels()
    f = CSVFile("label_train_twoclass.ascii", "w")
    lab.save(f)

    f = BinaryFile("label_train_twoclass.bin", "w")
    lab.save(f)

    f = HDF5File("label_train_real.h5", "w", "/data/labels")
    lab.save(f)

    f = CSVFile("label_train_twoclass.ascii")
    lab2.load(f)

    f = BinaryFile("label_train_twoclass.bin")
    lab2.load(f)

    f = HDF5File("fm_train_real.h5", "r", "/data/doubles")
    feats2.load(f)
    #print(feats2.get_feature_matrix())
    f = HDF5File("label_train_real.h5", "r", "/data/labels")
    lab2.load(f)
    #print(lab2.get_labels())

    #clean up
    import os
    for f in [
            'fm_train_sparsereal.bin', 'fm_train_sparsereal.ascii',
            'fm_train_real.bin', 'fm_train_real.h5', 'fm_train_real.ascii',
            'label_train_real.h5', 'label_train_twoclass.ascii',
            'label_train_twoclass.bin'
    ]:
        os.unlink(f)
    return feats, feats2, lab, lab2
示例#18
0
    knn.train()

    test_features, test_labels = testdat.features, testdat.labels

    predicted_labels = knn.apply(test_features)
    evaluator = MulticlassAccuracy()
    acc = evaluator.evaluate(predicted_labels, test_labels)
    err = 1 - acc

    return err


features_file = '../data/fm_ape_gut.txt'
labels_file = '../data/label_ape_gut.txt'

features = RealFeatures(CSVFile(features_file))
labels = MulticlassLabels(CSVFile(labels_file))

# reduce the number of features to use so that the training is faster but still
# the results of feature selection are significant
fm = features.get_feature_matrix()
features = RealFeatures(fm[:500, :])

assert (features.get_num_vectors() == labels.get_num_labels())

print('Number of examples = %d, number of features = %d.' %
      (features.get_num_vectors(), features.get_num_features()))

visualize_tdsne(features, labels)
lmnn = diagonal_lmnn(features, labels, max_iter=1200)