示例#1
0
def feature_function():
    
    from modshogun import RealFeatures
    from modshogun import CSVFile
    import numpy as np

    #3x3 random matrix 
    feat_arr = np.random.rand(3, 3)
    
    #initialize RealFeatures from numpy array
    features = RealFeatures(feat_arr)

    #get matrix value function
    print features.get_feature_matrix(features)
    
    #get selected column of matrix
    print features.get_feature_vector(1)

    #get number of columns
    print features.get_num_features()

    #get number of rows
    print features.get_num_vectors()
    
    feats_from_csv = RealFeatures(CSVFile("csv/feature.csv"))
    print "csv is ", feats_from_csv.get_feature_matrix()
def classifier_featureblock_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):

	from modshogun import BinaryLabels, RealFeatures, IndexBlock, IndexBlockGroup
	try:
		from modshogun import FeatureBlockLogisticRegression
	except ImportError:
		print("FeatureBlockLogisticRegression not available")
		exit(0)

	features = RealFeatures(hstack((traindat,traindat)))
	labels = BinaryLabels(hstack((label_train,label_train)))

	n_features = features.get_num_features()
	block_one = IndexBlock(0,n_features//2)
	block_two = IndexBlock(n_features//2,n_features)
	block_group = IndexBlockGroup()
	block_group.add_block(block_one)
	block_group.add_block(block_two)

	mtlr = FeatureBlockLogisticRegression(0.1,features,labels,block_group)
	mtlr.set_regularization(1) # use regularization ratio
	mtlr.set_tolerance(1e-2) # use 1e-2 tolerance
	mtlr.train()
	out = mtlr.apply().get_labels()

	return out
示例#3
0
def classifier_featureblock_logistic_regression(fm_train=traindat,
                                                fm_test=testdat,
                                                label_train=label_traindat):

    from modshogun import BinaryLabels, RealFeatures, IndexBlock, IndexBlockGroup
    try:
        from modshogun import FeatureBlockLogisticRegression
    except ImportError:
        print("FeatureBlockLogisticRegression not available")
        exit(0)

    features = RealFeatures(hstack((traindat, traindat)))
    labels = BinaryLabels(hstack((label_train, label_train)))

    n_features = features.get_num_features()
    block_one = IndexBlock(0, n_features // 2)
    block_two = IndexBlock(n_features // 2, n_features)
    block_group = IndexBlockGroup()
    block_group.add_block(block_one)
    block_group.add_block(block_two)

    mtlr = FeatureBlockLogisticRegression(0.1, features, labels, block_group)
    mtlr.set_regularization(1)  # use regularization ratio
    mtlr.set_tolerance(1e-2)  # use 1e-2 tolerance
    mtlr.train()
    out = mtlr.apply().get_labels()

    return out
def neuralnets_simple_modular (train_fname, test_fname,
		label_fname, C, epsilon):

	from modshogun import NeuralLayers, NeuralNetwork, RealFeatures, BinaryLabels
	from modshogun import Math_init_random, CSVFile
	Math_init_random(17)

	feats_train=RealFeatures(CSVFile(train_fname))
	feats_test=RealFeatures(CSVFile(test_fname))
	labels=BinaryLabels(CSVFile(label_fname))

	layers = NeuralLayers()
	network = NeuralNetwork(layers.input(feats_train.get_num_features()).linear(50).softmax(2).done())
	network.quick_connect()
	network.initialize_neural_network()

	network.set_labels(labels)
	network.train(feats_train)
	return network, network.apply_multiclass(feats_test)
示例#5
0
def load_data(num_train_samples=7291, m_data_dict=data_dict):
    from modshogun import RealFeatures, MulticlassLabels
    import numpy

    train_vec = m_data_dict['yTr'][0][:num_train_samples].astype(numpy.float64)
    train_labels = MulticlassLabels(train_vec)
    test_vec = m_data_dict['yTe'][0].astype(numpy.float64)
    test_labels = MulticlassLabels(test_vec)
    print "#train_labels = " + str(train_labels.get_num_labels())
    print "#test_labels  = " + str(test_labels.get_num_labels())

    train_mat = m_data_dict['xTr'][:, :num_train_samples].astype(numpy.float64)
    train_features = RealFeatures(train_mat)
    test_mat = m_data_dict['xTe'].astype(numpy.float64)
    test_features = RealFeatures(test_mat)
    print "#train_vectors = " + str(train_features.get_num_vectors())
    print "#test_vectors  = " + str(test_features.get_num_vectors())
    print "data dimension = " + str(test_features.get_num_features())

    return train_features, train_labels, test_features, test_labels
示例#6
0
def load_data(num_train_samples=7291, m_data_dict=data_dict):
	from modshogun import RealFeatures, MulticlassLabels
	import numpy

	train_vec = m_data_dict['yTr'][0][:num_train_samples].astype(numpy.float64)
	train_labels = MulticlassLabels(train_vec)
	test_vec = m_data_dict['yTe'][0].astype(numpy.float64)
 	test_labels = MulticlassLabels(test_vec)
	print "#train_labels = " + str(train_labels.get_num_labels())
	print "#test_labels  = " + str(test_labels.get_num_labels())

	train_mat = m_data_dict['xTr'][:,:num_train_samples].astype(numpy.float64)
	train_features = RealFeatures(train_mat)
	test_mat = m_data_dict['xTe'].astype(numpy.float64)
	test_features = RealFeatures(test_mat)
	print "#train_vectors = " + str(train_features.get_num_vectors())
	print "#test_vectors  = " + str(test_features.get_num_vectors())
	print "data dimension = " + str(test_features.get_num_features())

	return train_features, train_labels, test_features, test_labels
示例#7
0
def neuralnets_simple_modular(train_fname, test_fname, label_fname, C,
                              epsilon):

    from modshogun import NeuralLayers, NeuralNetwork, RealFeatures, BinaryLabels
    from modshogun import Math_init_random, CSVFile
    Math_init_random(17)

    feats_train = RealFeatures(CSVFile(train_fname))
    feats_test = RealFeatures(CSVFile(test_fname))
    labels = BinaryLabels(CSVFile(label_fname))

    layers = NeuralLayers()
    network = NeuralNetwork(
        layers.input(
            feats_train.get_num_features()).linear(50).softmax(2).done())
    network.quick_connect()
    network.initialize_neural_network()

    network.set_labels(labels)
    network.train(feats_train)
    return network, network.apply_multiclass(feats_test)
示例#8
0
		xi = x[y==val]
		axis.scatter(xi[:,0], xi[:,1], s=50, facecolors='none', edgecolors=COLS[idx])

def plot_neighborhood_graph(x, nn, axis):
	for i in xrange(x.shape[0]):
		xs = [x[i,0], x[nn[1,i], 0]]
		ys = [x[i,1], x[nn[1,i], 1]]
		axis.plot(xs, ys, COLS[int(y[i])])

figure, axarr = pyplot.subplots(3, 1)
x, y = sandwich_data()

features = RealFeatures(x.T)
labels = MulticlassLabels(y)

print('%d vectors with %d features' % (features.get_num_vectors(), features.get_num_features()))
assert(features.get_num_vectors() == labels.get_num_labels())

distance = EuclideanDistance(features, features)
k = 2
knn = KNN(k, distance, labels)

plot_data(x, y, axarr[0])
plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0])
axarr[0].set_aspect('equal')
axarr[0].set_xlim(-6, 4)
axarr[0].set_ylim(-3, 2)

lmnn = LMNN(features, labels, k)
lmnn.set_maxiter(10000)
lmnn.train()
示例#9
0
def plot_neighborhood_graph(x, nn, axis):
    for i in xrange(x.shape[0]):
        xs = [x[i, 0], x[nn[1, i], 0]]
        ys = [x[i, 1], x[nn[1, i], 1]]
        axis.plot(xs, ys, COLS[int(y[i])])


figure, axarr = pyplot.subplots(3, 1)
x, y = sandwich_data()

features = RealFeatures(x.T)
labels = MulticlassLabels(y)

print('%d vectors with %d features' %
      (features.get_num_vectors(), features.get_num_features()))
assert (features.get_num_vectors() == labels.get_num_labels())

distance = EuclideanDistance(features, features)
k = 2
knn = KNN(k, distance, labels)

plot_data(x, y, axarr[0])
plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0])
axarr[0].set_aspect('equal')
axarr[0].set_xlim(-6, 4)
axarr[0].set_ylim(-3, 2)

lmnn = LMNN(features, labels, k)
lmnn.set_maxiter(10000)
lmnn.train()
示例#10
0
#!/usr/bin/python

from modshogun import CSVFile, RealFeatures, RescaleFeatures
from scipy.linalg import solve_triangular, cholesky, sqrtm, inv
import matplotlib.pyplot as pyplot
import numpy

# load wine features
features = RealFeatures(CSVFile('../data/fm_wine.dat'))

print('%d vectors with %d features.' % (features.get_num_vectors(), features.get_num_features()))
print('original features mean = ' + str(numpy.mean(features, axis=1)))

# rescale the features to [0,1]
feature_rescaling = RescaleFeatures()
feature_rescaling.init(features)
features.add_preprocessor(feature_rescaling)
features.apply_preprocessor()

print('mean after rescaling = ' + str(numpy.mean(features, axis=1)))

# remove mean from data
data = features.get_feature_matrix()
data = data.T
data-= numpy.mean(data, axis=0)
print numpy.mean(data, axis=0)

fig, axarr = pyplot.subplots(1,2)
axarr[0].matshow(numpy.cov(data.T))

#### whiten data
示例#11
0
#!/usr/bin/python

from scipy import io

data_dict = io.loadmat('../data/NBData20_train_preprocessed.mat')

xt = data_dict['xt']
yt = data_dict['yt']

import numpy
from modshogun import RealFeatures, MulticlassLabels, LMNN, MSG_DEBUG

features = RealFeatures(xt.T)
labels = MulticlassLabels(numpy.squeeze(yt))

k = 6
lmnn = LMNN(features, labels, k)
lmnn.io.set_loglevel(MSG_DEBUG)
lmnn.set_diagonal(True)
lmnn.set_maxiter(10000)
lmnn.train(numpy.eye(features.get_num_features()))
示例#12
0
#!/usr/bin/python

from scipy import io

data_dict = io.loadmat('../data/NBData20_train_preprocessed.mat')

xt = data_dict['xt']
yt = data_dict['yt']

import numpy
from modshogun import RealFeatures,MulticlassLabels,LMNN,MSG_DEBUG

features = RealFeatures(xt.T)
labels = MulticlassLabels(numpy.squeeze(yt))

k = 6
lmnn = LMNN(features,labels,k)
lmnn.io.set_loglevel(MSG_DEBUG)
lmnn.set_diagonal(True)
lmnn.set_maxiter(10000)
lmnn.train(numpy.eye(features.get_num_features()))

示例#13
0
#!/usr/bin/python

from modshogun import CSVFile, RealFeatures, RescaleFeatures
from scipy.linalg import solve_triangular, cholesky, sqrtm, inv
import matplotlib.pyplot as pyplot
import numpy

# load wine features
features = RealFeatures(CSVFile('../data/fm_wine.dat'))

print('%d vectors with %d features.' %
      (features.get_num_vectors(), features.get_num_features()))
print('original features mean = ' + str(numpy.mean(features, axis=1)))

# rescale the features to [0,1]
feature_rescaling = RescaleFeatures()
feature_rescaling.init(features)
features.add_preprocessor(feature_rescaling)
features.apply_preprocessor()

print('mean after rescaling = ' + str(numpy.mean(features, axis=1)))

# remove mean from data
data = features.get_feature_matrix()
data = data.T
data -= numpy.mean(data, axis=0)
print numpy.mean(data, axis=0)

fig, axarr = pyplot.subplots(1, 2)
axarr[0].matshow(numpy.cov(data.T))
示例#14
0
	acc = evaluator.evaluate(predicted_labels, test_labels)
	err = 1-acc

	return err

features_file = '../data/fm_ape_gut.txt'
labels_file = '../data/label_ape_gut.txt'

features = RealFeatures(CSVFile(features_file))
labels = MulticlassLabels(CSVFile(labels_file))

# reduce the number of features to use so that the training is faster but still
# the results of feature selection are significant
fm = features.get_feature_matrix()
features = RealFeatures(fm[:500, :])

assert(features.get_num_vectors() == labels.get_num_labels())

print('Number of examples = %d, number of features = %d.' % (features.get_num_vectors(), features.get_num_features()))

visualize_tdsne(features, labels)
lmnn = diagonal_lmnn(features, labels, max_iter=1200)

diagonal_transform = lmnn.get_linear_transform()
diagonal = numpy.diag(diagonal_transform)
print('%d out of %d elements are non-zero' % (numpy.sum(diagonal != 0), diagonal.shape[0]))

statistics = lmnn.get_statistics()
pyplot.plot(statistics.obj.get())
pyplot.show()