def features_sparse_modular (A): from scipy.sparse import csc_matrix from modshogun import SparseRealFeatures from numpy import array, float64, all # sparse representation X of dense matrix A # note, will work with types other than float64 too, # but requires recent scipy.sparse X=csc_matrix(A) #print(A) # create sparse shogun features from dense matrix A a=SparseRealFeatures(A) a_out=a.get_full_feature_matrix() #print(a_out) assert(all(a_out==A)) #print(a_out) # create sparse shogun features from sparse matrix X a.set_sparse_feature_matrix(X) a_out=a.get_full_feature_matrix() #print(a_out) assert(all(a_out==A)) # create sparse shogun features from sparse matrix X a=SparseRealFeatures(X) a_out=a.get_full_feature_matrix() #print(a_out) assert(all(a_out==A)) # obtain (data,row,indptr) csc arrays of sparse shogun features z=csc_matrix(a.get_sparse_feature_matrix()) z_out=z.todense() #print(z_out) assert(all(z_out==A))
def features_read_svmlight_format_modular(fname): import os from modshogun import SparseRealFeatures from modshogun import LibSVMFile f = SparseRealFeatures() lab = f.load_with_labels(LibSVMFile(fname)) f.save_with_labels(LibSVMFile("testwrite.light", "w"), lab)
def features_read_svmlight_format_modular(fname): import os from modshogun import SparseRealFeatures from modshogun import LibSVMFile f = SparseRealFeatures() lab = f.load_with_labels(LibSVMFile(fname)) f.save_with_labels(LibSVMFile('testwrite.light', 'w'), lab)
def load_sparse_data(filename, dimension=None): input_file = LibSVMFile(args.dataset) sparse_feats = SparseRealFeatures() label_array = sparse_feats.load_with_labels(input_file) labels = BinaryLabels(label_array) if dimension!=None: sparse_feats.set_num_features(dimension) return {'data':sparse_feats, 'labels':labels}
def load_sparse_data(filename, dimension=None): input_file = LibSVMFile(args.dataset) sparse_feats = SparseRealFeatures() label_array = sparse_feats.load_with_labels(input_file) labels = BinaryLabels(label_array) if dimension != None: sparse_feats.set_num_features(dimension) return {'data': sparse_feats, 'labels': labels}
def kernel_sparse_gaussian_modular(fm_train_real=traindat, fm_test_real=testdat, width=1.1): from modshogun import SparseRealFeatures from modshogun import GaussianKernel feats_train = SparseRealFeatures(fm_train_real) feats_test = SparseRealFeatures(fm_test_real) kernel = GaussianKernel(feats_train, feats_train, width) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def kernel_sparse_linear_modular(fm_train_real=traindat, fm_test_real=testdat, scale=1.1): from modshogun import SparseRealFeatures from modshogun import LinearKernel, AvgDiagKernelNormalizer feats_train = SparseRealFeatures(fm_train_real) feats_test = SparseRealFeatures(fm_test_real) kernel = LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def kernel_sparse_poly_modular (fm_train_real=traindat,fm_test_real=testdat, size_cache=10,degree=3,inhomogene=True ): from modshogun import SparseRealFeatures from modshogun import PolyKernel feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) kernel=PolyKernel(feats_train, feats_train, size_cache, inhomogene, degree) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def compute_output_plot_isolines(classifier, kernel=None, train=None, sparse=False, pos=None, neg=None, regression=False): size = 100 if pos is not None and neg is not None: x1_max = max(1.2 * pos[0, :]) x1_min = min(1.2 * neg[0, :]) x2_min = min(1.2 * neg[1, :]) x2_max = max(1.2 * pos[1, :]) x1 = linspace(x1_min, x1_max, size) x2 = linspace(x2_min, x2_max, size) else: x1 = linspace(-5, 5, size) x2 = linspace(-5, 5, size) x, y = meshgrid(x1, x2) dense = RealFeatures(array((ravel(x), ravel(y)))) if sparse: test = SparseRealFeatures() test.obtain_from_simple(dense) else: test = dense if kernel and train: kernel.init(train, test) else: classifier.set_features(test) labels = None if regression: labels = classifier.apply().get_labels() else: labels = classifier.apply().get_values() z = labels.reshape((size, size)) return x, y, z
def compute_output_plot_isolines( classifier, kernel=None, train=None, sparse=False, pos=None, neg=None, regression=False ): size = 100 if pos is not None and neg is not None: x1_max = max(1.2 * pos[0, :]) x1_min = min(1.2 * neg[0, :]) x2_min = min(1.2 * neg[1, :]) x2_max = max(1.2 * pos[1, :]) x1 = linspace(x1_min, x1_max, size) x2 = linspace(x2_min, x2_max, size) else: x1 = linspace(-5, 5, size) x2 = linspace(-5, 5, size) x, y = meshgrid(x1, x2) dense = RealFeatures(array((ravel(x), ravel(y)))) if sparse: test = SparseRealFeatures() test.obtain_from_simple(dense) else: test = dense if kernel and train: kernel.init(train, test) else: classifier.set_features(test) labels = None if regression: labels = classifier.apply().get_labels() else: labels = classifier.apply().get_values() z = labels.reshape((size, size)) return x, y, z
def distance_sparseeuclidean_modular(train_fname=traindat, test_fname=testdat): from modshogun import RealFeatures, SparseRealFeatures, SparseEuclideanDistance, CSVFile realfeat = RealFeatures(CSVFile(train_fname)) feats_train = SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat = RealFeatures(CSVFile(test_fname)) feats_test = SparseRealFeatures() feats_test.obtain_from_simple(realfeat) distance = SparseEuclideanDistance(feats_train, feats_train) dm_train = distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test = distance.get_distance_matrix() return distance, dm_train, dm_test
def distance_sparseeuclidean_modular (train_fname=traindat,test_fname=testdat): from modshogun import RealFeatures, SparseRealFeatures, SparseEuclideanDistance, CSVFile realfeat=RealFeatures(CSVFile(train_fname)) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(CSVFile(test_fname)) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) distance=SparseEuclideanDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test
def features_io_modular (fm_train_real, label_train_twoclass): import numpy from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels from modshogun import GaussianKernel from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File feats=SparseRealFeatures(fm_train_real) feats2=SparseRealFeatures() f=BinaryFile("tmp/fm_train_sparsereal.bin","w") feats.save(f) f=LibSVMFile("tmp/fm_train_sparsereal.ascii","w") feats.save(f) f=BinaryFile("tmp/fm_train_sparsereal.bin") feats2.load(f) f=LibSVMFile("tmp/fm_train_sparsereal.ascii") feats2.load(f) feats=RealFeatures(fm_train_real) feats2=RealFeatures() f=BinaryFile("tmp/fm_train_real.bin","w") feats.save(f) f=HDF5File("tmp/fm_train_real.h5","w", "/data/doubles") feats.save(f) f=CSVFile("tmp/fm_train_real.ascii","w") feats.save(f) f=BinaryFile("tmp/fm_train_real.bin") feats2.load(f) #print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))) f=CSVFile("tmp/fm_train_real.ascii") feats2.load(f) #print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))) lab=MulticlassLabels(numpy.array([0.0,1.0,2.0,3.0])) lab2=MulticlassLabels() f=CSVFile("tmp/label_train_twoclass.ascii","w") lab.save(f) f=BinaryFile("tmp/label_train_twoclass.bin","w") lab.save(f) f=HDF5File("tmp/label_train_real.h5","w", "/data/labels") lab.save(f) f=CSVFile("tmp/label_train_twoclass.ascii") lab2.load(f) f=BinaryFile("tmp/label_train_twoclass.bin") lab2.load(f) f=HDF5File("tmp/fm_train_real.h5","r", "/data/doubles") feats2.load(f) #print(feats2.get_feature_matrix()) f=HDF5File("tmp/label_train_real.h5","r", "/data/labels") lab2.load(f) #print(lab2.get_labels()) #clean up import os for f in ['tmp/fm_train_sparsereal.bin','tmp/fm_train_sparsereal.ascii', 'tmp/fm_train_real.bin','tmp/fm_train_real.h5','tmp/fm_train_real.ascii', 'tmp/label_train_real.h5', 'tmp/label_train_twoclass.ascii','tmp/label_train_twoclass.bin']: os.unlink(f) return feats, feats2, lab, lab2
def get_features_and_labels(input_file): feats = SparseRealFeatures() label_array = feats.load_with_labels(input_file) labels = MulticlassLabels(label_array) return feats, labels
def features_io_modular(fm_train_real, label_train_twoclass): import numpy from modshogun import SparseRealFeatures, RealFeatures, MulticlassLabels from modshogun import GaussianKernel from modshogun import LibSVMFile, CSVFile, BinaryFile, HDF5File feats = SparseRealFeatures(fm_train_real) feats2 = SparseRealFeatures() f = BinaryFile("fm_train_sparsereal.bin", "w") feats.save(f) f = LibSVMFile("fm_train_sparsereal.ascii", "w") feats.save(f) f = BinaryFile("fm_train_sparsereal.bin") feats2.load(f) f = LibSVMFile("fm_train_sparsereal.ascii") feats2.load(f) feats = RealFeatures(fm_train_real) feats2 = RealFeatures() f = BinaryFile("fm_train_real.bin", "w") feats.save(f) f = HDF5File("fm_train_real.h5", "w", "/data/doubles") feats.save(f) f = CSVFile("fm_train_real.ascii", "w") feats.save(f) f = BinaryFile("fm_train_real.bin") feats2.load(f) #print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))) f = CSVFile("fm_train_real.ascii") feats2.load(f) #print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten()))) lab = MulticlassLabels(numpy.array([0.0, 1.0, 2.0, 3.0])) lab2 = MulticlassLabels() f = CSVFile("label_train_twoclass.ascii", "w") lab.save(f) f = BinaryFile("label_train_twoclass.bin", "w") lab.save(f) f = HDF5File("label_train_real.h5", "w", "/data/labels") lab.save(f) f = CSVFile("label_train_twoclass.ascii") lab2.load(f) f = BinaryFile("label_train_twoclass.bin") lab2.load(f) f = HDF5File("fm_train_real.h5", "r", "/data/doubles") feats2.load(f) #print(feats2.get_feature_matrix()) f = HDF5File("label_train_real.h5", "r", "/data/labels") lab2.load(f) #print(lab2.get_labels()) #clean up import os for f in [ 'fm_train_sparsereal.bin', 'fm_train_sparsereal.ascii', 'fm_train_real.bin', 'fm_train_real.h5', 'fm_train_real.ascii', 'label_train_real.h5', 'label_train_twoclass.ascii', 'label_train_twoclass.bin' ]: os.unlink(f) return feats, feats2, lab, lab2