def features_dense_io_modular():
    from modshogun import RealFeatures, CSVFile
    feats = RealFeatures()
    f = CSVFile("../data/fm_train_real.dat", "r")
    f.set_delimiter(" ")
    feats.load(f)
    return feats
示例#2
0
def classifier_gpbtsvm_modular(train_fname=traindat,
                               test_fname=testdat,
                               label_fname=label_traindat,
                               width=2.1,
                               C=1,
                               epsilon=1e-5):
    from modshogun import RealFeatures, BinaryLabels
    from modshogun import GaussianKernel
    from modshogun import CSVFile
    try:
        from modshogun import GPBTSVM
    except ImportError:
        print("GPBTSVM not available")
        exit(0)

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = BinaryLabels(CSVFile(label_fname))
    kernel = GaussianKernel(feats_train, feats_train, width)

    svm = GPBTSVM(C, kernel, labels)
    svm.set_epsilon(epsilon)
    svm.train()

    predictions = svm.apply(feats_test)
    return predictions, svm, predictions.get_labels()
def distance_manhattenword_modular(train_fname=traindna,
                                   test_fname=testdna,
                                   order=3,
                                   gap=0,
                                   reverse=False):
    from modshogun import StringCharFeatures, StringWordFeatures, DNA
    from modshogun import SortWordString, ManhattanWordDistance, CSVFile

    charfeat = StringCharFeatures(CSVFile(train_fname), DNA)
    feats_train = StringWordFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    preproc = SortWordString()
    preproc.init(feats_train)
    feats_train.add_preprocessor(preproc)
    feats_train.apply_preprocessor()

    charfeat = StringCharFeatures(CSVFile(test_fname), DNA)
    feats_test = StringWordFeatures(charfeat.get_alphabet())
    feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    feats_test.add_preprocessor(preproc)
    feats_test.apply_preprocessor()

    distance = ManhattanWordDistance(feats_train, feats_train)

    dm_train = distance.get_distance_matrix()
    distance.init(feats_train, feats_test)
    dm_test = distance.get_distance_matrix()
    return dm_train, dm_test
def features_dense_io_modular():
	from modshogun import RealFeatures, CSVFile
	feats=RealFeatures()
	f=CSVFile("../data/fm_train_real.dat","r")
	f.set_delimiter(" ")
	feats.load(f)
	return feats
示例#5
0
def metric_lmnn_modular(train_fname=traindat,
                        test_fname=testdat,
                        label_train_fname=label_traindat,
                        k=3):
    try:
        from modshogun import RealFeatures, MulticlassLabels, LMNN, KNN, CSVFile
    except ImportError:
        return

    # wrap features and labels into Shogun objects
    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = MulticlassLabels(CSVFile(label_train_fname))

    # LMNN
    lmnn = LMNN(feats_train, labels, k)
    lmnn.train()
    lmnn_distance = lmnn.get_distance()

    # perform classification with KNN
    knn = KNN(k, lmnn_distance, labels)
    knn.train()
    output = knn.apply(feats_test).get_labels()

    return lmnn, output
def multiclass_randomforest_modular(train=traindat,
                                    test=testdat,
                                    labels=label_traindat,
                                    ft=feattypes):
    try:
        from modshogun import RealFeatures, MulticlassLabels, CSVFile, RandomForest, MajorityVote
    except ImportError:
        print("Could not import Shogun modules")
        return

    # wrap features and labels into Shogun objects
    feats_train = RealFeatures(CSVFile(train))
    feats_test = RealFeatures(CSVFile(test))
    train_labels = MulticlassLabels(CSVFile(labels))

    # Random Forest formation
    rand_forest = RandomForest(feats_train, train_labels, 20, 1)
    rand_forest.set_feature_types(ft)
    rand_forest.set_combination_rule(MajorityVote())
    rand_forest.train()

    # Classify test data
    output = rand_forest.apply_multiclass(feats_test).get_labels()

    return rand_forest, output
示例#7
0
def classifier_svmocas_modular(train_fname=traindat,
                               test_fname=testdat,
                               label_fname=label_traindat,
                               C=0.9,
                               epsilon=1e-5,
                               num_threads=1):
    from modshogun import RealFeatures, BinaryLabels
    from modshogun import CSVFile
    try:
        from modshogun import SVMOcas
    except ImportError:
        print("SVMOcas not available")
        return

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = BinaryLabels(CSVFile(label_fname))

    svm = SVMOcas(C, feats_train, labels)
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.set_bias_enabled(False)
    svm.train()

    bias = svm.get_bias()
    w = svm.get_w()
    predictions = svm.apply(feats_test)
    return predictions, svm, predictions.get_labels()
def labels_io_modular():
	from modshogun import RegressionLabels, CSVFile
	lab=RegressionLabels()
	f=CSVFile("../data/label_train_regression.dat","r")
	f.set_delimiter(" ")
	lab.load(f)
	#print lab.get_labels()
	return lab
示例#9
0
def labels_io_modular():
    from modshogun import RegressionLabels, CSVFile
    lab = RegressionLabels()
    f = CSVFile("../data/label_train_regression.dat", "r")
    f.set_delimiter(" ")
    lab.load(f)
    #print lab.get_labels()
    return lab
示例#10
0
def classifier_gaussiannaivebayes_modular (train_fname=traindat,test_fname=testdat,label_train_fname=label_traindat):
	from modshogun import RealFeatures, MulticlassLabels, GaussianNaiveBayes, CSVFile

	feats_train=RealFeatures(CSVFile(train_fname))
	feats_test=RealFeatures(CSVFile(test_fname))
	labels=MulticlassLabels(CSVFile(label_train_fname))

	gnb=GaussianNaiveBayes(feats_train, labels)
	gnb_train = gnb.train()
	output=gnb.apply(feats_test).get_labels()
	return gnb, gnb_train, output
示例#11
0
def distance_chisquare_modular(train_fname=traindat, test_fname=testdat):
    from modshogun import RealFeatures, ChiSquareDistance, CSVFile
    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    distance = ChiSquareDistance(feats_train, feats_train)
    dm_train = distance.get_distance_matrix()

    distance.init(feats_train, feats_test)
    dm_test = distance.get_distance_matrix()
    return distance, dm_train, dm_test
示例#12
0
def kernel_chi2_modular (train_fname=traindat,test_fname=testdat,width=1.4, size_cache=10):
	from modshogun import RealFeatures, Chi2Kernel, CSVFile, NormOne

	feats_train=RealFeatures(CSVFile(train_fname))
	feats_test=RealFeatures(CSVFile(test_fname))

	kernel=Chi2Kernel(feats_train, feats_train, width, size_cache)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
def distance_braycurtis_modular(train_fname=traindat, test_fname=testdat):
    from modshogun import RealFeatures, BrayCurtisDistance, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    distance = BrayCurtisDistance(feats_train, feats_train)
    dm_train = distance.get_distance_matrix()

    distance.init(feats_train, feats_test)
    dm_test = distance.get_distance_matrix()
    return distance, dm_train, dm_test
示例#14
0
def kernel_cauchy_modular(train_fname=traindat, test_fname=testdat, sigma=1.0):
    from modshogun import RealFeatures, CauchyKernel, CSVFile, EuclideanDistance
    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    distance = EuclideanDistance(feats_train, feats_train)
    kernel = CauchyKernel(feats_train, feats_train, sigma, distance)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
示例#15
0
def kernel_linear_byte_modular (train_fname=traindat,test_fname=testdat):
	from modshogun import LinearKernel, ByteFeatures, CSVFile

	feats_train=ByteFeatures(CSVFile(train_fname))
	feats_test=ByteFeatures(CSVFile(test_fname))

	kernel=LinearKernel(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return kernel
示例#16
0
def kernel_auc_modular(train_fname=traindat,
                       label_fname=label_traindat,
                       width=1.7):
    from modshogun import GaussianKernel, AUCKernel, RealFeatures
    from modshogun import BinaryLabels, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    subkernel = GaussianKernel(feats_train, feats_train, width)

    kernel = AUCKernel(0, subkernel)
    kernel.setup_auc_maximization(BinaryLabels(CSVFile(label_fname)))
    km_train = kernel.get_kernel_matrix()
    return kernel
示例#17
0
def distance_minkowski_modular(train_fname=traindat, test_fname=testdat, k=3):
    from modshogun import RealFeatures, MinkowskiMetric, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    distance = MinkowskiMetric(feats_train, feats_train, k)

    dm_train = distance.get_distance_matrix()
    distance.init(feats_train, feats_test)
    dm_test = distance.get_distance_matrix()

    return distance, dm_train, dm_test
def distance_manhatten_modular (train_fname,test_fname=testdat):
	from modshogun import RealFeatures, ManhattanMetric, CSVFile

	feats_train=RealFeatures(CSVFile(train_fname))
	feats_test=RealFeatures(CSVFile(test_fname))

	distance=ManhattanMetric(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test
示例#19
0
def kernel_exponential_modular (train_fname=traindat,test_fname=testdat, tau_coef=1.0):
	from modshogun import RealFeatures, ExponentialKernel, EuclideanDistance, CSVFile

	feats_train=RealFeatures(CSVFile(train_fname))
	feats_test=RealFeatures(CSVFile(test_fname))

	distance = EuclideanDistance(feats_train, feats_train)
	kernel=ExponentialKernel(feats_train, feats_train, tau_coef, distance, 10)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
示例#20
0
def kernel_gaussian_modular(train_fname=traindat,
                            test_fname=testdat,
                            width=1.3):
    from modshogun import RealFeatures, GaussianKernel, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    kernel = GaussianKernel(feats_train, feats_train, width)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
示例#21
0
def kernel_rationalquadratic_modular (train_fname=traindat,test_fname=testdat, shift_coef=1.0):
	from modshogun import RealFeatures, RationalQuadraticKernel, EuclideanDistance, CSVFile

	feats_train=RealFeatures(CSVFile(train_fname))
	feats_test=RealFeatures(CSVFile(test_fname))

	distance=EuclideanDistance(feats_train, feats_train)

	kernel=RationalQuadraticKernel(feats_train, feats_train, shift_coef, distance)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
def distance_geodesic_modular (train_fname=traindat,test_fname=testdat):

	from modshogun import RealFeatures, GeodesicMetric, CSVFile

	feats_train=RealFeatures(CSVFile(train_fname))
	feats_test=RealFeatures(CSVFile(test_fname))

	distance=GeodesicMetric(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test
示例#23
0
def kernel_power_modular(train_fname=traindat, test_fname=testdat, degree=2.0):
    from modshogun import RealFeatures, PowerKernel, EuclideanDistance, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    distance = EuclideanDistance(feats_train, feats_train)

    kernel = PowerKernel(feats_train, feats_train, degree, distance)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
def distance_normsquared_modular(train_fname=traindat, test_fname=testdat):
    from modshogun import RealFeatures, EuclideanDistance, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    distance = EuclideanDistance(feats_train, feats_train)
    distance.set_disable_sqrt(True)

    dm_train = distance.get_distance_matrix()
    distance.init(feats_train, feats_test)
    dm_test = distance.get_distance_matrix()

    return distance, dm_train, dm_test
示例#25
0
def kernel_distance_modular(train_fname=traindat,
                            test_fname=testdat,
                            width=1.7):
    from modshogun import RealFeatures, DistanceKernel, EuclideanDistance, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    distance = EuclideanDistance()
    kernel = DistanceKernel(feats_train, feats_test, width, distance)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
示例#26
0
def kernel_anova_modular(train_fname=traindat,
                         test_fname=testdat,
                         cardinality=2,
                         size_cache=10):
    from modshogun import ANOVAKernel, RealFeatures, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    kernel = ANOVAKernel(feats_train, feats_train, cardinality, size_cache)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
示例#27
0
def kernel_linear_modular(train_fname=traindat, test_fname=testdat, scale=1.2):

    from modshogun import RealFeatures, LinearKernel, AvgDiagKernelNormalizer, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    kernel = LinearKernel()
    kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
    kernel.init(feats_train, feats_train)

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
示例#28
0
def kernel_sigmoid_modular(train_fname=traindat,
                           test_fname=testdat,
                           size_cache=10,
                           gamma=1.2,
                           coef0=1.3):
    from modshogun import RealFeatures, SigmoidKernel, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    kernel = SigmoidKernel(feats_train, feats_train, size_cache, gamma, coef0)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
def features_string_file_modular (directory, fname):
	from modshogun import StringCharFeatures, RAWBYTE
	from modshogun import CSVFile

	# load features from directory
	f=StringCharFeatures(RAWBYTE)
	f.load_from_directory(directory)

	#and output several stats
	#print("max string length", f.get_max_vector_length())
	#print("number of strings", f.get_num_vectors())
	#print("length of first string", f.get_vector_length(0))
	#print("str[0,0:3]", f.get_feature(0,0), f.get_feature(0,1), f.get_feature(0,2))
	#print("len(str[0])", f.get_vector_length(0))
	#print("str[0]", f.get_feature_vector(0))

	#or load features from file (one string per line)
	fil=CSVFile(fname)
	f.load(fil)
	#print(f.get_features())

	#or load fasta file
	#f.load_fasta('fasta.fa')
	#print(f.get_features())
	return f.get_features(), f
示例#30
0
def converter_multidimensionalscaling_modular(data_fname):
    try:
        import numpy
        from modshogun import RealFeatures, MultidimensionalScaling, EuclideanDistance, CSVFile

        features = RealFeatures(CSVFile(data_fname))

        distance_before = EuclideanDistance()
        distance_before.init(features, features)

        converter = MultidimensionalScaling()
        converter.set_target_dim(2)
        converter.set_landmark(False)
        embedding = converter.apply(features)

        distance_after = EuclideanDistance()
        distance_after.init(embedding, embedding)

        distance_matrix_after = distance_after.get_distance_matrix()
        distance_matrix_before = distance_before.get_distance_matrix()

        return numpy.linalg.norm(distance_matrix_after -
                                 distance_matrix_before) / numpy.linalg.norm(
                                     distance_matrix_before) < 1e-6
    except ImportError:
        print('No Eigen3 available')
def multiclass_c45classifiertree_modular(train=traindat,
                                         test=testdat,
                                         labels=label_traindat,
                                         ft=feattypes):
    try:
        from modshogun import RealFeatures, MulticlassLabels, CSVFile, C45ClassifierTree
        from numpy import random, int32
    except ImportError:
        print("Could not import Shogun and/or numpy modules")
        return

    # wrap features and labels into Shogun objects
    feats_train = RealFeatures(CSVFile(train))
    feats_test = RealFeatures(CSVFile(test))
    train_labels = MulticlassLabels(CSVFile(labels))

    # divide train dataset into training and validation subsets in the ratio 2/3 to 1/3
    subset = int32(random.permutation(feats_train.get_num_vectors()))
    vsubset = subset[1:subset.size / 3]
    trsubset = subset[1 + subset.size / 3:subset.size]

    # C4.5 Tree formation using training subset
    train_labels.add_subset(trsubset)
    feats_train.add_subset(trsubset)

    c = C45ClassifierTree()
    c.set_labels(train_labels)
    c.set_feature_types(ft)
    c.train(feats_train)

    train_labels.remove_subset()
    feats_train.remove_subset()

    # prune tree using validation subset
    train_labels.add_subset(vsubset)
    feats_train.add_subset(vsubset)

    c.prune_tree(feats_train, train_labels)

    train_labels.remove_subset()
    feats_train.remove_subset()

    # Classify test data
    output = c.apply_multiclass(feats_test).get_labels()
    output_certainty = c.get_certainty_vector()

    return c, output, output_certainty
示例#32
0
def kernel_poly_modular(train_fname=traindat,
                        test_fname=testdat,
                        degree=4,
                        inhomogene=False,
                        use_normalization=True):
    from modshogun import RealFeatures, PolyKernel, CSVFile

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))

    kernel = PolyKernel(feats_train, feats_train, degree, inhomogene,
                        use_normalization)

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel