def feature_function(): from modshogun import RealFeatures from modshogun import CSVFile import numpy as np #3x3 random matrix feat_arr = np.random.rand(3, 3) #initialize RealFeatures from numpy array features = RealFeatures(feat_arr) #get matrix value function print features.get_feature_matrix(features) #get selected column of matrix print features.get_feature_vector(1) #get number of columns print features.get_num_features() #get number of rows print features.get_num_vectors() feats_from_csv = RealFeatures(CSVFile("csv/feature.csv")) print "csv is ", feats_from_csv.get_feature_matrix()
def features_dense_modular (A=matrixA,B=matrixB,C=matrixC): a=RealFeatures(A) b=LongIntFeatures(B) c=ByteFeatures(C) # or 16bit wide ... #feat1 = f.ShortFeatures(N.zeros((10,5),N.short)) #feat2 = f.WordFeatures(N.zeros((10,5),N.uint16)) # print(some statistics about a) # get first feature vector and set it a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0) # get matrices a_out = a.get_feature_matrix() b_out = b.get_feature_matrix() c_out = c.get_feature_matrix() assert(all(a_out==A)) assert(all(b_out==B)) assert(all(c_out==C)) return a_out,b_out,c_out,a,b,c
def features_dense_real_modular (A=matrix): # ... of type Real, LongInt and Byte a=RealFeatures(A) # print(some statistics about a) #print(a.get_num_vectors()) #print(a.get_num_features()) # get first feature vector and set it #print(a.get_feature_vector(0)) a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0) # get matrix a_out = a.get_feature_matrix() assert(all(a_out==A)) return a_out
def features_dense_real_modular(A=matrix): # ... of type Real, LongInt and Byte a = RealFeatures(A) # print(some statistics about a) #print(a.get_num_vectors()) #print(a.get_num_features()) # get first feature vector and set it #print(a.get_feature_vector(0)) a.set_feature_vector(array([1, 4, 0, 0, 0, 9], dtype=float64), 0) # get matrix a_out = a.get_feature_matrix() assert (all(a_out == A)) return a_out
# load wine features features = RealFeatures(CSVFile('../data/fm_wine.dat')) print('%d vectors with %d features.' % (features.get_num_vectors(), features.get_num_features())) print('original features mean = ' + str(numpy.mean(features, axis=1))) # rescale the features to [0,1] feature_rescaling = RescaleFeatures() feature_rescaling.init(features) features.add_preprocessor(feature_rescaling) features.apply_preprocessor() print('mean after rescaling = ' + str(numpy.mean(features, axis=1))) # remove mean from data data = features.get_feature_matrix() data = data.T data-= numpy.mean(data, axis=0) print numpy.mean(data, axis=0) fig, axarr = pyplot.subplots(1,2) axarr[0].matshow(numpy.cov(data.T)) #### whiten data ''' this method to whiten the data didn't really work out L = cholesky(numpy.cov(data.T)) data = solve_triangular(L, data.T, lower=True).T ''' # covariance matrix
evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1 - acc return err features_file = '../data/fm_ape_gut.txt' labels_file = '../data/label_ape_gut.txt' features = RealFeatures(CSVFile(features_file)) labels = MulticlassLabels(CSVFile(labels_file)) # reduce the number of features to use so that the training is faster but still # the results of feature selection are significant fm = features.get_feature_matrix() features = RealFeatures(fm[:500, :]) assert (features.get_num_vectors() == labels.get_num_labels()) print('Number of examples = %d, number of features = %d.' % (features.get_num_vectors(), features.get_num_features())) visualize_tdsne(features, labels) lmnn = diagonal_lmnn(features, labels, max_iter=1200) diagonal_transform = lmnn.get_linear_transform() diagonal = numpy.diag(diagonal_transform) print('%d out of %d elements are non-zero' % (numpy.sum(diagonal != 0), diagonal.shape[0]))
features = RealFeatures(CSVFile('../data/fm_wine.dat')) print('%d vectors with %d features.' % (features.get_num_vectors(), features.get_num_features())) print('original features mean = ' + str(numpy.mean(features, axis=1))) # rescale the features to [0,1] feature_rescaling = RescaleFeatures() feature_rescaling.init(features) features.add_preprocessor(feature_rescaling) features.apply_preprocessor() print('mean after rescaling = ' + str(numpy.mean(features, axis=1))) # remove mean from data data = features.get_feature_matrix() data = data.T data -= numpy.mean(data, axis=0) print numpy.mean(data, axis=0) fig, axarr = pyplot.subplots(1, 2) axarr[0].matshow(numpy.cov(data.T)) #### whiten data ''' this method to whiten the data didn't really work out L = cholesky(numpy.cov(data.T)) data = solve_triangular(L, data.T, lower=True).T ''' # covariance matrix M = numpy.cov(data.T)
predicted_labels = knn.apply(test_features) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(predicted_labels, test_labels) err = 1-acc return err features_file = '../data/fm_ape_gut.txt' labels_file = '../data/label_ape_gut.txt' features = RealFeatures(CSVFile(features_file)) labels = MulticlassLabels(CSVFile(labels_file)) # reduce the number of features to use so that the training is faster but still # the results of feature selection are significant fm = features.get_feature_matrix() features = RealFeatures(fm[:500, :]) assert(features.get_num_vectors() == labels.get_num_labels()) print('Number of examples = %d, number of features = %d.' % (features.get_num_vectors(), features.get_num_features())) visualize_tdsne(features, labels) lmnn = diagonal_lmnn(features, labels, max_iter=1200) diagonal_transform = lmnn.get_linear_transform() diagonal = numpy.diag(diagonal_transform) print('%d out of %d elements are non-zero' % (numpy.sum(diagonal != 0), diagonal.shape[0])) statistics = lmnn.get_statistics() pyplot.plot(statistics.obj.get())