def feature_function(): from modshogun import RealFeatures from modshogun import CSVFile import numpy as np #3x3 random matrix feat_arr = np.random.rand(3, 3) #initialize RealFeatures from numpy array features = RealFeatures(feat_arr) #get matrix value function print features.get_feature_matrix(features) #get selected column of matrix print features.get_feature_vector(1) #get number of columns print features.get_num_features() #get number of rows print features.get_num_vectors() feats_from_csv = RealFeatures(CSVFile("csv/feature.csv")) print "csv is ", feats_from_csv.get_feature_matrix()
def classifier_featureblock_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat): from modshogun import BinaryLabels, RealFeatures, IndexBlock, IndexBlockGroup try: from modshogun import FeatureBlockLogisticRegression except ImportError: print("FeatureBlockLogisticRegression not available") exit(0) features = RealFeatures(hstack((traindat,traindat))) labels = BinaryLabels(hstack((label_train,label_train))) n_features = features.get_num_features() block_one = IndexBlock(0,n_features//2) block_two = IndexBlock(n_features//2,n_features) block_group = IndexBlockGroup() block_group.add_block(block_one) block_group.add_block(block_two) mtlr = FeatureBlockLogisticRegression(0.1,features,labels,block_group) mtlr.set_regularization(1) # use regularization ratio mtlr.set_tolerance(1e-2) # use 1e-2 tolerance mtlr.train() out = mtlr.apply().get_labels() return out
def classifier_featureblock_logistic_regression(fm_train=traindat, fm_test=testdat, label_train=label_traindat): from modshogun import BinaryLabels, RealFeatures, IndexBlock, IndexBlockGroup try: from modshogun import FeatureBlockLogisticRegression except ImportError: print("FeatureBlockLogisticRegression not available") exit(0) features = RealFeatures(hstack((traindat, traindat))) labels = BinaryLabels(hstack((label_train, label_train))) n_features = features.get_num_features() block_one = IndexBlock(0, n_features // 2) block_two = IndexBlock(n_features // 2, n_features) block_group = IndexBlockGroup() block_group.add_block(block_one) block_group.add_block(block_two) mtlr = FeatureBlockLogisticRegression(0.1, features, labels, block_group) mtlr.set_regularization(1) # use regularization ratio mtlr.set_tolerance(1e-2) # use 1e-2 tolerance mtlr.train() out = mtlr.apply().get_labels() return out
def neuralnets_simple_modular (train_fname, test_fname, label_fname, C, epsilon): from modshogun import NeuralLayers, NeuralNetwork, RealFeatures, BinaryLabels from modshogun import Math_init_random, CSVFile Math_init_random(17) feats_train=RealFeatures(CSVFile(train_fname)) feats_test=RealFeatures(CSVFile(test_fname)) labels=BinaryLabels(CSVFile(label_fname)) layers = NeuralLayers() network = NeuralNetwork(layers.input(feats_train.get_num_features()).linear(50).softmax(2).done()) network.quick_connect() network.initialize_neural_network() network.set_labels(labels) network.train(feats_train) return network, network.apply_multiclass(feats_test)
def load_data(num_train_samples=7291, m_data_dict=data_dict): from modshogun import RealFeatures, MulticlassLabels import numpy train_vec = m_data_dict['yTr'][0][:num_train_samples].astype(numpy.float64) train_labels = MulticlassLabels(train_vec) test_vec = m_data_dict['yTe'][0].astype(numpy.float64) test_labels = MulticlassLabels(test_vec) print "#train_labels = " + str(train_labels.get_num_labels()) print "#test_labels = " + str(test_labels.get_num_labels()) train_mat = m_data_dict['xTr'][:, :num_train_samples].astype(numpy.float64) train_features = RealFeatures(train_mat) test_mat = m_data_dict['xTe'].astype(numpy.float64) test_features = RealFeatures(test_mat) print "#train_vectors = " + str(train_features.get_num_vectors()) print "#test_vectors = " + str(test_features.get_num_vectors()) print "data dimension = " + str(test_features.get_num_features()) return train_features, train_labels, test_features, test_labels
def load_data(num_train_samples=7291, m_data_dict=data_dict): from modshogun import RealFeatures, MulticlassLabels import numpy train_vec = m_data_dict['yTr'][0][:num_train_samples].astype(numpy.float64) train_labels = MulticlassLabels(train_vec) test_vec = m_data_dict['yTe'][0].astype(numpy.float64) test_labels = MulticlassLabels(test_vec) print "#train_labels = " + str(train_labels.get_num_labels()) print "#test_labels = " + str(test_labels.get_num_labels()) train_mat = m_data_dict['xTr'][:,:num_train_samples].astype(numpy.float64) train_features = RealFeatures(train_mat) test_mat = m_data_dict['xTe'].astype(numpy.float64) test_features = RealFeatures(test_mat) print "#train_vectors = " + str(train_features.get_num_vectors()) print "#test_vectors = " + str(test_features.get_num_vectors()) print "data dimension = " + str(test_features.get_num_features()) return train_features, train_labels, test_features, test_labels
def neuralnets_simple_modular(train_fname, test_fname, label_fname, C, epsilon): from modshogun import NeuralLayers, NeuralNetwork, RealFeatures, BinaryLabels from modshogun import Math_init_random, CSVFile Math_init_random(17) feats_train = RealFeatures(CSVFile(train_fname)) feats_test = RealFeatures(CSVFile(test_fname)) labels = BinaryLabels(CSVFile(label_fname)) layers = NeuralLayers() network = NeuralNetwork( layers.input( feats_train.get_num_features()).linear(50).softmax(2).done()) network.quick_connect() network.initialize_neural_network() network.set_labels(labels) network.train(feats_train) return network, network.apply_multiclass(feats_test)
xi = x[y==val] axis.scatter(xi[:,0], xi[:,1], s=50, facecolors='none', edgecolors=COLS[idx]) def plot_neighborhood_graph(x, nn, axis): for i in xrange(x.shape[0]): xs = [x[i,0], x[nn[1,i], 0]] ys = [x[i,1], x[nn[1,i], 1]] axis.plot(xs, ys, COLS[int(y[i])]) figure, axarr = pyplot.subplots(3, 1) x, y = sandwich_data() features = RealFeatures(x.T) labels = MulticlassLabels(y) print('%d vectors with %d features' % (features.get_num_vectors(), features.get_num_features())) assert(features.get_num_vectors() == labels.get_num_labels()) distance = EuclideanDistance(features, features) k = 2 knn = KNN(k, distance, labels) plot_data(x, y, axarr[0]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0]) axarr[0].set_aspect('equal') axarr[0].set_xlim(-6, 4) axarr[0].set_ylim(-3, 2) lmnn = LMNN(features, labels, k) lmnn.set_maxiter(10000) lmnn.train()
def plot_neighborhood_graph(x, nn, axis): for i in xrange(x.shape[0]): xs = [x[i, 0], x[nn[1, i], 0]] ys = [x[i, 1], x[nn[1, i], 1]] axis.plot(xs, ys, COLS[int(y[i])]) figure, axarr = pyplot.subplots(3, 1) x, y = sandwich_data() features = RealFeatures(x.T) labels = MulticlassLabels(y) print('%d vectors with %d features' % (features.get_num_vectors(), features.get_num_features())) assert (features.get_num_vectors() == labels.get_num_labels()) distance = EuclideanDistance(features, features) k = 2 knn = KNN(k, distance, labels) plot_data(x, y, axarr[0]) plot_neighborhood_graph(x, knn.nearest_neighbors(), axarr[0]) axarr[0].set_aspect('equal') axarr[0].set_xlim(-6, 4) axarr[0].set_ylim(-3, 2) lmnn = LMNN(features, labels, k) lmnn.set_maxiter(10000) lmnn.train()
#!/usr/bin/python from modshogun import CSVFile, RealFeatures, RescaleFeatures from scipy.linalg import solve_triangular, cholesky, sqrtm, inv import matplotlib.pyplot as pyplot import numpy # load wine features features = RealFeatures(CSVFile('../data/fm_wine.dat')) print('%d vectors with %d features.' % (features.get_num_vectors(), features.get_num_features())) print('original features mean = ' + str(numpy.mean(features, axis=1))) # rescale the features to [0,1] feature_rescaling = RescaleFeatures() feature_rescaling.init(features) features.add_preprocessor(feature_rescaling) features.apply_preprocessor() print('mean after rescaling = ' + str(numpy.mean(features, axis=1))) # remove mean from data data = features.get_feature_matrix() data = data.T data-= numpy.mean(data, axis=0) print numpy.mean(data, axis=0) fig, axarr = pyplot.subplots(1,2) axarr[0].matshow(numpy.cov(data.T)) #### whiten data
#!/usr/bin/python from scipy import io data_dict = io.loadmat('../data/NBData20_train_preprocessed.mat') xt = data_dict['xt'] yt = data_dict['yt'] import numpy from modshogun import RealFeatures, MulticlassLabels, LMNN, MSG_DEBUG features = RealFeatures(xt.T) labels = MulticlassLabels(numpy.squeeze(yt)) k = 6 lmnn = LMNN(features, labels, k) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.set_diagonal(True) lmnn.set_maxiter(10000) lmnn.train(numpy.eye(features.get_num_features()))
#!/usr/bin/python from scipy import io data_dict = io.loadmat('../data/NBData20_train_preprocessed.mat') xt = data_dict['xt'] yt = data_dict['yt'] import numpy from modshogun import RealFeatures,MulticlassLabels,LMNN,MSG_DEBUG features = RealFeatures(xt.T) labels = MulticlassLabels(numpy.squeeze(yt)) k = 6 lmnn = LMNN(features,labels,k) lmnn.io.set_loglevel(MSG_DEBUG) lmnn.set_diagonal(True) lmnn.set_maxiter(10000) lmnn.train(numpy.eye(features.get_num_features()))
#!/usr/bin/python from modshogun import CSVFile, RealFeatures, RescaleFeatures from scipy.linalg import solve_triangular, cholesky, sqrtm, inv import matplotlib.pyplot as pyplot import numpy # load wine features features = RealFeatures(CSVFile('../data/fm_wine.dat')) print('%d vectors with %d features.' % (features.get_num_vectors(), features.get_num_features())) print('original features mean = ' + str(numpy.mean(features, axis=1))) # rescale the features to [0,1] feature_rescaling = RescaleFeatures() feature_rescaling.init(features) features.add_preprocessor(feature_rescaling) features.apply_preprocessor() print('mean after rescaling = ' + str(numpy.mean(features, axis=1))) # remove mean from data data = features.get_feature_matrix() data = data.T data -= numpy.mean(data, axis=0) print numpy.mean(data, axis=0) fig, axarr = pyplot.subplots(1, 2) axarr[0].matshow(numpy.cov(data.T))
acc = evaluator.evaluate(predicted_labels, test_labels) err = 1-acc return err features_file = '../data/fm_ape_gut.txt' labels_file = '../data/label_ape_gut.txt' features = RealFeatures(CSVFile(features_file)) labels = MulticlassLabels(CSVFile(labels_file)) # reduce the number of features to use so that the training is faster but still # the results of feature selection are significant fm = features.get_feature_matrix() features = RealFeatures(fm[:500, :]) assert(features.get_num_vectors() == labels.get_num_labels()) print('Number of examples = %d, number of features = %d.' % (features.get_num_vectors(), features.get_num_features())) visualize_tdsne(features, labels) lmnn = diagonal_lmnn(features, labels, max_iter=1200) diagonal_transform = lmnn.get_linear_transform() diagonal = numpy.diag(diagonal_transform) print('%d out of %d elements are non-zero' % (numpy.sum(diagonal != 0), diagonal.shape[0])) statistics = lmnn.get_statistics() pyplot.plot(statistics.obj.get()) pyplot.show()