示例#1
0
def feature_function():
    
    from modshogun import RealFeatures
    from modshogun import CSVFile
    import numpy as np

    #3x3 random matrix 
    feat_arr = np.random.rand(3, 3)
    
    #initialize RealFeatures from numpy array
    features = RealFeatures(feat_arr)

    #get matrix value function
    print features.get_feature_matrix(features)
    
    #get selected column of matrix
    print features.get_feature_vector(1)

    #get number of columns
    print features.get_num_features()

    #get number of rows
    print features.get_num_vectors()
    
    feats_from_csv = RealFeatures(CSVFile("csv/feature.csv"))
    print "csv is ", feats_from_csv.get_feature_matrix()
示例#2
0
def features_dense_modular (A=matrixA,B=matrixB,C=matrixC):

    a=RealFeatures(A)
    b=LongIntFeatures(B)
    c=ByteFeatures(C)

# or 16bit wide ...
#feat1 = f.ShortFeatures(N.zeros((10,5),N.short))
#feat2 = f.WordFeatures(N.zeros((10,5),N.uint16))


# print(some statistics about a)

# get first feature vector and set it

    a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0)

# get matrices
    a_out = a.get_feature_matrix()
    b_out = b.get_feature_matrix()
    c_out = c.get_feature_matrix()

    assert(all(a_out==A))

    assert(all(b_out==B))

    assert(all(c_out==C))
    return a_out,b_out,c_out,a,b,c
def features_dense_real_modular (A=matrix):

# ... of type Real, LongInt and Byte
    a=RealFeatures(A)

# print(some statistics about a)
#print(a.get_num_vectors())
#print(a.get_num_features())

# get first feature vector and set it
#print(a.get_feature_vector(0))
    a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0)

# get matrix
    a_out = a.get_feature_matrix()

    assert(all(a_out==A))
    return a_out
示例#4
0
def features_dense_real_modular(A=matrix):

    # ... of type Real, LongInt and Byte
    a = RealFeatures(A)

    # print(some statistics about a)
    #print(a.get_num_vectors())
    #print(a.get_num_features())

    # get first feature vector and set it
    #print(a.get_feature_vector(0))
    a.set_feature_vector(array([1, 4, 0, 0, 0, 9], dtype=float64), 0)

    # get matrix
    a_out = a.get_feature_matrix()

    assert (all(a_out == A))
    return a_out
示例#5
0
# load wine features
features = RealFeatures(CSVFile('../data/fm_wine.dat'))

print('%d vectors with %d features.' % (features.get_num_vectors(), features.get_num_features()))
print('original features mean = ' + str(numpy.mean(features, axis=1)))

# rescale the features to [0,1]
feature_rescaling = RescaleFeatures()
feature_rescaling.init(features)
features.add_preprocessor(feature_rescaling)
features.apply_preprocessor()

print('mean after rescaling = ' + str(numpy.mean(features, axis=1)))

# remove mean from data
data = features.get_feature_matrix()
data = data.T
data-= numpy.mean(data, axis=0)
print numpy.mean(data, axis=0)

fig, axarr = pyplot.subplots(1,2)
axarr[0].matshow(numpy.cov(data.T))

#### whiten data

''' this method to whiten the data didn't really work out
L = cholesky(numpy.cov(data.T))
data = solve_triangular(L, data.T, lower=True).T
'''

# covariance matrix
示例#6
0
    evaluator = MulticlassAccuracy()
    acc = evaluator.evaluate(predicted_labels, test_labels)
    err = 1 - acc

    return err


features_file = '../data/fm_ape_gut.txt'
labels_file = '../data/label_ape_gut.txt'

features = RealFeatures(CSVFile(features_file))
labels = MulticlassLabels(CSVFile(labels_file))

# reduce the number of features to use so that the training is faster but still
# the results of feature selection are significant
fm = features.get_feature_matrix()
features = RealFeatures(fm[:500, :])

assert (features.get_num_vectors() == labels.get_num_labels())

print('Number of examples = %d, number of features = %d.' %
      (features.get_num_vectors(), features.get_num_features()))

visualize_tdsne(features, labels)
lmnn = diagonal_lmnn(features, labels, max_iter=1200)

diagonal_transform = lmnn.get_linear_transform()
diagonal = numpy.diag(diagonal_transform)
print('%d out of %d elements are non-zero' %
      (numpy.sum(diagonal != 0), diagonal.shape[0]))
示例#7
0
features = RealFeatures(CSVFile('../data/fm_wine.dat'))

print('%d vectors with %d features.' %
      (features.get_num_vectors(), features.get_num_features()))
print('original features mean = ' + str(numpy.mean(features, axis=1)))

# rescale the features to [0,1]
feature_rescaling = RescaleFeatures()
feature_rescaling.init(features)
features.add_preprocessor(feature_rescaling)
features.apply_preprocessor()

print('mean after rescaling = ' + str(numpy.mean(features, axis=1)))

# remove mean from data
data = features.get_feature_matrix()
data = data.T
data -= numpy.mean(data, axis=0)
print numpy.mean(data, axis=0)

fig, axarr = pyplot.subplots(1, 2)
axarr[0].matshow(numpy.cov(data.T))

#### whiten data
''' this method to whiten the data didn't really work out
L = cholesky(numpy.cov(data.T))
data = solve_triangular(L, data.T, lower=True).T
'''

# covariance matrix
M = numpy.cov(data.T)
示例#8
0
	predicted_labels = knn.apply(test_features)
	evaluator = MulticlassAccuracy()
	acc = evaluator.evaluate(predicted_labels, test_labels)
	err = 1-acc

	return err

features_file = '../data/fm_ape_gut.txt'
labels_file = '../data/label_ape_gut.txt'

features = RealFeatures(CSVFile(features_file))
labels = MulticlassLabels(CSVFile(labels_file))

# reduce the number of features to use so that the training is faster but still
# the results of feature selection are significant
fm = features.get_feature_matrix()
features = RealFeatures(fm[:500, :])

assert(features.get_num_vectors() == labels.get_num_labels())

print('Number of examples = %d, number of features = %d.' % (features.get_num_vectors(), features.get_num_features()))

visualize_tdsne(features, labels)
lmnn = diagonal_lmnn(features, labels, max_iter=1200)

diagonal_transform = lmnn.get_linear_transform()
diagonal = numpy.diag(diagonal_transform)
print('%d out of %d elements are non-zero' % (numpy.sum(diagonal != 0), diagonal.shape[0]))

statistics = lmnn.get_statistics()
pyplot.plot(statistics.obj.get())