def kernel_linear_byte_modular(fm_train_byte=traindat,fm_test_byte=testdat): from shogun.Kernel import LinearKernel from shogun.Features import ByteFeatures feats_train=ByteFeatures(fm_train_byte) feats_test=ByteFeatures(fm_test_byte) kernel=LinearKernel(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return kernel
def kernel_sparse_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.1): from shogun.Features import SparseRealFeatures from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) kernel=LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def kernel_linear_word_modular (fm_train_word=traindat,fm_test_word=testdat,scale=1.2): from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer from shogun.Features import WordFeatures feats_train=WordFeatures(fm_train_word) feats_test=WordFeatures(fm_test_word) kernel=LinearKernel(feats_train, feats_train) kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return kernel
def kernel_director_linear_modular(fm_train_real=traindat, fm_test_real=testdat, scale=1.2): from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer from modshogun import Time feats_train = RealFeatures(fm_train_real) feats_train.io.set_loglevel(0) feats_train.parallel.set_num_threads(1) feats_test = RealFeatures(fm_test_real) kernel = LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) dkernel = DirectorLinearKernel() dkernel.set_normalizer(AvgDiagKernelNormalizer(scale)) dkernel.init(feats_train, feats_train) print "km_train" t = Time() km_train = kernel.get_kernel_matrix() t1 = t.cur_time_diff(True) print "dkm_train" t = Time() dkm_train = dkernel.get_kernel_matrix() t2 = t.cur_time_diff(True) print "km_train", km_train print "dkm_train", dkm_train return km_train, dkm_train
def kernel_sparse_linear_modular(fm_train_real=traindat, fm_test_real=testdat, scale=1.1): from shogun.Features import SparseRealFeatures from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer feats_train = SparseRealFeatures(fm_train_real) feats_test = SparseRealFeatures(fm_test_real) kernel = LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def create_kernel(kname, features, kparam=None): if kname == 'gauss': kernel = GaussianKernel(features, features, kparam) elif kname == 'linear': kernel = LinearKernel(features, features) elif kname == 'poly': kernel = PolyKernel(features, features, kparam, True, False) return kernel
def linear (): print 'Linear' from shogun.Features import RealFeatures from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) scale=1.2 kernel=LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix()
def kernel_linear_word_modular(fm_train_word=traindat, fm_test_word=testdat, scale=1.2): from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer from shogun.Features import WordFeatures feats_train = WordFeatures(fm_train_word) feats_test = WordFeatures(fm_test_word) kernel = LinearKernel(feats_train, feats_train) kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return kernel
def kernel_director_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2): from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer from modshogun import Time feats_train=RealFeatures(fm_train_real) feats_train.io.set_loglevel(0) feats_train.parallel.set_num_threads(1) feats_test=RealFeatures(fm_test_real) kernel=LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) dkernel=DirectorLinearKernel() dkernel.set_normalizer(AvgDiagKernelNormalizer(scale)) dkernel.init(feats_train, feats_train) print "km_train" t=Time() km_train=kernel.get_kernel_matrix() t1=t.cur_time_diff(True) print "dkm_train" t=Time() dkm_train=dkernel.get_kernel_matrix() t2=t.cur_time_diff(True) print "km_train", km_train print "dkm_train", dkm_train return km_train, dkm_train
def create_kernel(kname, kparam, feats_train): """Call the corresponding constructor for the kernel""" if kname == 'gauss': kernel = GaussianKernel(feats_train, feats_train, kparam['width']) elif kname == 'linear': kernel = LinearKernel(feats_train, feats_train) kernel.set_normalizer(AvgDiagKernelNormalizer(kparam['scale'])) elif kname == 'poly': kernel = PolyKernel(feats_train, feats_train, kparam['degree'], kparam['inhomogene'], kparam['normal']) elif kname == 'wd': kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, kparam['degree']) kernel.set_normalizer( AvgDiagKernelNormalizer(float(kparam['seqlength']))) kernel.set_shifts(kparam['shift'] * numpy.ones(kparam['seqlength'], dtype=numpy.int32)) #kernel=WeightedDegreeStringKernel(feats_train, feats_train, kparam['degree']) elif kname == 'spec': kernel = CommUlongStringKernel(feats_train, feats_train) elif kname == 'cumspec': kernel = WeightedCommWordStringKernel(feats_train, feats_train) kernel.set_weights(numpy.ones(kparam['degree'])) elif kname == 'spec2': kernel = CombinedKernel() k0 = CommWordStringKernel(feats_train['f0'], feats_train['f0']) k0.io.disable_progress() kernel.append_kernel(k0) k1 = CommWordStringKernel(feats_train['f1'], feats_train['f1']) k1.io.disable_progress() kernel.append_kernel(k1) elif kname == 'cumspec2': kernel = CombinedKernel() k0 = WeightedCommWordStringKernel(feats_train['f0'], feats_train['f0']) k0.set_weights(numpy.ones(kparam['degree'])) k0.io.disable_progress() kernel.append_kernel(k0) k1 = WeightedCommWordStringKernel(feats_train['f1'], feats_train['f1']) k1.set_weights(numpy.ones(kparam['degree'])) k1.io.disable_progress() kernel.append_kernel(k1) elif kname == 'localalign': kernel = LocalAlignmentStringKernel(feats_train, feats_train) elif kname == 'localimprove': kernel = LocalityImprovedStringKernel(feats_train, feats_train, kparam['length'],\ kparam['indeg'], kparam['outdeg']) else: print 'Unknown kernel %s' % kname kernel.set_cache_size(32) return kernel
def create_kernel(examples, param): """ kernel factory @param examples: list/array of examples @type examples: list @param param: parameter object @type param: Parameter @return subclass of shogun Kernel object @rtype: Kernel """ # first create feature object of correct type feat = create_features(examples, param) kernel = None if param.kernel == "WeightedDegreeStringKernel": kernel = WeightedDegreeStringKernel(feat, feat, param.wdk_degree) kernel.set_cache_size(200) elif param.kernel == "LinearKernel": kernel = LinearKernel(feat, feat) elif param.kernel == "PolyKernel": kernel = PolyKernel(feat, feat, 1, False) elif param.kernel == "GaussianKernel": kernel = GaussianKernel(feat, feat, param.sigma) elif param.kernel == "WeightedDegreeRBFKernel": size_cache = 200 nof_properties = 20 sigma = param.base_similarity kernel = WeightedDegreeRBFKernel(feat, feat, sigma, param.wdk_degree, nof_properties, size_cache) elif param.kernel == "Promoter": kernel = create_promoter_kernel(examples, param.flags) else: raise Exception, "Unknown kernel type." if hasattr(param, "flags") and param.flags.has_key("cache_size"): kernel.set_cache_size(param.flags["cache_size"]) if param.flags.has_key("debug"): kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) return kernel
def mkl_multiclass_modular(fm_train_real, fm_test_real, label_train_multiclass, width, C, epsilon, num_threads, mkl_epsilon, mkl_norm): from shogun.Features import CombinedFeatures, RealFeatures, Labels from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel, PolyKernel from shogun.Classifier import MKLMultiClass kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = GaussianKernel(10, width) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = LinearKernel() feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = PolyKernel(10, 2) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = Labels(label_train_multiclass) mkl = MKLMultiClass(C, kernel, labels) mkl.set_epsilon(epsilon) mkl.parallel.set_num_threads(num_threads) mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() kernel.init(feats_train, feats_test) out = mkl.apply().get_labels() return out
def converter_kernellocallylinearembedding_modular(data, k): from shogun.Features import RealFeatures from shogun.Converter import KernelLocallyLinearEmbedding from shogun.Kernel import LinearKernel features = RealFeatures(data) kernel = LinearKernel() converter = KernelLocallyLinearEmbedding(kernel) converter.set_target_dim(1) converter.set_k(k) converter.apply(features) return features
def kernel_director_linear_modular(fm_train_real=traindat, fm_test_real=testdat, scale=1.2): try: from shogun.Kernel import DirectorKernel except ImportError: print "recompile shogun with --enable-swig-directors" return class DirectorLinearKernel(DirectorKernel): def __init__(self): DirectorKernel.__init__(self, True) def kernel_function(self, idx_a, idx_b): seq1 = self.get_lhs().get_feature_vector(idx_a) seq2 = self.get_rhs().get_feature_vector(idx_b) return numpy.dot(seq1, seq2) from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer from modshogun import Time feats_train = RealFeatures(fm_train_real) #feats_train.io.set_loglevel(MSG_DEBUG) feats_train.parallel.set_num_threads(1) feats_test = RealFeatures(fm_test_real) kernel = LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) dkernel = DirectorLinearKernel() dkernel.set_normalizer(AvgDiagKernelNormalizer(scale)) dkernel.init(feats_train, feats_train) #print "km_train" t = Time() km_train = kernel.get_kernel_matrix() #t1=t.cur_time_diff(True) #print "dkm_train" t = Time() dkm_train = dkernel.get_kernel_matrix() #t2=t.cur_time_diff(True) #print "km_train", km_train #print "dkm_train", dkm_train return km_train, dkm_train
def converter_kernellocallylinearembedding_modular(data, k): try: from shogun.Features import RealFeatures from shogun.Converter import KernelLocallyLinearEmbedding from shogun.Kernel import LinearKernel features = RealFeatures(data) kernel = LinearKernel() converter = KernelLocallyLinearEmbedding(kernel) converter.set_target_dim(1) converter.set_k(k) converter.apply(features) return features except ImportError: print('No Eigen3 available')
def kernel_linear_byte_modular(fm_train_byte=traindat,fm_test_byte=testdat): from shogun.Kernel import LinearKernel from shogun.Features import ByteFeatures feats_train=ByteFeatures(fm_train_byte) feats_test=ByteFeatures(fm_test_byte) kernel=LinearKernel(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return kernel
def create_empty_kernel(param): """ kernel factory @param param: parameter object @type param: Parameter @return subclass of shogun Kernel object @rtype: Kernel """ kernel = None if param.kernel == "WeightedDegreeStringKernel": kernel = WeightedDegreeStringKernel(param.wdk_degree) elif param.kernel == "LinearKernel": kernel = LinearKernel() elif param.kernel == "PolyKernel": kernel = PolyKernel(10, 1, False) elif param.kernel == "GaussianKernel": kernel = GaussianKernel(10, param.sigma) elif param.kernel == "WeightedDegreeRBFKernel": size_cache = 50 nof_properties = 5 #20 sigma = param.transform kernel = WeightedDegreeRBFKernel(size_cache, sigma, param.wdk_degree, nof_properties) else: raise Exception, "Unknown kernel type:" + param.kernel if hasattr(param, "flags") and param.flags.has_key("cache_size"): kernel.set_cache_size(param.flags["cache_size"]) if param.flags.has_key("debug"): kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG) return kernel
def kernel_director_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2): try: from shogun.Kernel import DirectorKernel except ImportError: print "recompile shogun with --enable-swig-directors" return class DirectorLinearKernel(DirectorKernel): def __init__(self): DirectorKernel.__init__(self, True) def kernel_function(self, idx_a, idx_b): seq1 = self.get_lhs().get_feature_vector(idx_a) seq2 = self.get_rhs().get_feature_vector(idx_b) return numpy.dot(seq1, seq2) from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer from modshogun import Time feats_train=RealFeatures(fm_train_real) #feats_train.io.set_loglevel(MSG_DEBUG) feats_train.parallel.set_num_threads(1) feats_test=RealFeatures(fm_test_real) kernel=LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) dkernel=DirectorLinearKernel() dkernel.set_normalizer(AvgDiagKernelNormalizer(scale)) dkernel.init(feats_train, feats_train) #print "km_train" t=Time() km_train=kernel.get_kernel_matrix() #t1=t.cur_time_diff(True) #print "dkm_train" t=Time() dkm_train=dkernel.get_kernel_matrix() #t2=t.cur_time_diff(True) #print "km_train", km_train #print "dkm_train", dkm_train return km_train, dkm_train
def get_presvm(B=2.0): examples_presvm = [numpy.array([ 2.1788894 , 3.89163458, 5.55086917, 6.4022742 , 3.14964751, -0.4622959 , 5.38538904, 5.9962938 , 6.29690849]), numpy.array([ 2.1788894 , 3.89163458, 5.55086917, 6.4022742 , 3.14964751, -0.4622959 , 5.38538904, 5.9962938 , 6.29690849]), numpy.array([ 0.93099452, 0.38871617, 1.57968949, 1.25672527, -0.8123137 , 0.20786586, 1.378121 , 1.15598866, 0.80265343]), numpy.array([ 0.68705535, 0.15144113, -0.81306157, -0.7664577 , 1.16452945, -0.2712956 , 0.483094 , -0.16302007, -0.39094812]), numpy.array([-0.71374437, -0.16851719, 1.43826895, 0.95961166, -0.2360497 , -0.30425755, 1.63157052, 1.15990427, 0.63801465]), numpy.array([ 0.68705535, 0.15144113, -0.81306157, -0.7664577 , 1.16452945, -0.2712956 , 0.483094 , -0.16302007, -0.39094812]), numpy.array([-0.71374437, -0.16851719, 1.43826895, 0.95961166, -0.2360497 , -0.30425755, 1.63157052, 1.15990427, 0.63801465]), numpy.array([-0.98028302, -0.23974489, 2.1687206 , 1.99338824, -0.67070205, -0.33167281, 1.3500379 , 1.34915685, 1.13747975]), numpy.array([ 0.67109612, 0.12662017, -0.48254886, -0.49091898, 1.31522237, -0.34108933, 0.57832179, -0.01992828, -0.26581628]), numpy.array([ 0.3193611 , 0.44903416, 3.62187778, 4.1490827 , 1.58832961, 1.95583397, 1.36836023, 1.92521945, 2.41114998])] labels_presvm = [-1.0, -1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0] examples = [numpy.array([-0.49144487, -0.19932263, -0.00408188, -0.21262012, 0.14621013, -0.50415481, 0.32317317, -0.00317602, -0.21422637]), numpy.array([ 0.0511817 , -0.04226666, -0.30454651, -0.38759116, 0.31639514, 0.32558471, 0.49364473, 0.04515591, -0.06963456]), numpy.array([-0.30324369, -0.11909251, -0.03210278, -0.2779561 , 1.31488853, -0.33165365, 0.60176018, -0.00384946, -0.15603975]), numpy.array([ 0.59282756, -0.0039991 , -0.26028983, -0.26722552, 1.63314995, -0.51199338, 0.33340685, -0.0170519 , -0.19211039]), numpy.array([-0.18338766, -0.07783465, 0.42019824, 0.201753 , 2.01160098, 0.33326111, 0.75591909, 0.36631525, 0.1761829 ]), numpy.array([ 0.10273793, -0.02189574, 0.91092358, 0.74827973, 0.51882902, -0.1286531 , 0.64463658, 0.67468349, 0.55587266]), numpy.array([-0.09727099, -0.13413522, 0.18771062, 0.19411594, 1.48547364, -0.43169608, 0.55064534, 0.24331473, 0.10878847]), numpy.array([-0.22494375, -0.15492964, 0.28017737, 0.29794467, 0.96403895, 0.43880289, 0.08053425, 0.07456818, 0.12102371]), numpy.array([-0.18161417, -0.17692039, 0.19554942, -0.00785625, 1.38315115, -0.05923183, -0.05723568, -0.15463646, -0.24249483]), numpy.array([-0.36538359, -0.20040061, -0.38384388, -0.40206556, -0.25040256, 0.94205875, 0.40162798, 0.00327328, -0.24107393])] labels = [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0] examples_test = [numpy.array([-0.45159799, -0.11401394, 1.28574573, 1.09144306, 0.92253119, -0.47230164, 0.77032486, 0.83047366, 0.74768906]), numpy.array([ 0.42613105, 0.0092778 , -0.78640296, -0.71632445, 0.41154244, 0.88380309, 0.19475759, -0.14195876, -0.30479425]), numpy.array([-0.09727099, -0.13413522, 0.18771062, 0.19411594, 1.48547364, -0.43169608, 0.55064534, 0.24331473, 0.10878847]), numpy.array([ 0.11558796, -0.08867647, -0.26432074, -0.30924546, -1.08243017, -0.1339607 , -0.1956124 , -0.2428358 , -0.25761213]), numpy.array([ 1.23679696, 0.18753081, -0.25593329, -0.12051991, 0.64976989, -0.17184101, 0.14951337, 0.01988587, -0.0356698 ]), numpy.array([ 1.03355002, 0.05316195, -0.97905368, -0.75482121, 0.28673776, 2.27142733, 0.02654739, -0.31109851, -0.44555277]), numpy.array([-0.53662325, -0.21434756, -0.12105795, -0.27531257, 0.66947047, 0.05474302, -0.00717455, -0.17700575, -0.22253444]), numpy.array([ 0.11272632, -0.12674826, -0.49736457, -0.51445609, 0.88518932, -0.51558669, -0.12000557, -0.32973613, -0.38488736]), numpy.array([ 0.8372111 , 0.06972199, -1.00454229, -0.79869642, 1.19376333, -0.40160273, -0.25122157, -0.46417918, -0.50234858]), numpy.array([-0.36325018, -0.12206184, 0.10525247, -0.15663416, 1.03616948, -0.51699463, 0.59566286, 0.35363369, 0.10545559])] ############################################# # compute pre-svm ############################################# # create real-valued features as first step examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64) examples_presvm = numpy.transpose(examples_presvm) feat_presvm = RealFeatures(examples_presvm) lab_presvm = Labels(numpy.array(labels_presvm)) wdk_presvm = LinearKernel(feat_presvm, feat_presvm) presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_max_iterations(10000) presvm_liblinear.set_bias_enabled(False) presvm_liblinear.train() #return presvm_liblinear #def get_da_svm(presvm_liblinear): ############################################# # compute linear term manually ############################################# examples = numpy.array(examples, dtype=numpy.float64) examples = numpy.transpose(examples) feat = RealFeatures(examples) lab = Labels(numpy.array(labels)) dasvm_liblinear = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_liblinear, B) dasvm_liblinear.set_bias_enabled(False) dasvm_liblinear.train() helper.save("/tmp/svm", presvm_liblinear) presvm_pickle = helper.load("/tmp/svm") dasvm_pickle = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_pickle, B) dasvm_pickle.set_bias_enabled(False) dasvm_pickle.train() helper.save("/tmp/dasvm", dasvm_liblinear) dasvm_pickle2 = helper.load("/tmp/dasvm") ############################################# # load test data ############################################# examples_test = numpy.array(examples_test, dtype=numpy.float64) examples_test = numpy.transpose(examples_test) feat_test = RealFeatures(examples_test) # check if pickled and unpickled classifiers behave the same out1 = dasvm_liblinear.classify(feat_test).get_labels() out2 = dasvm_pickle.classify(feat_test).get_labels() # compare outputs for i in xrange(len(out1)): try: assert(abs(out1[i]-out2[i])<= 0.001) except: print "(%.5f, %.5f)" % (out1[i], out2[i]) print "classification agrees."
def RunKPCAShogun(q): totalTimer = Timer() try: # Load input dataset. Log.Info("Loading dataset", self.verbose) data = np.genfromtxt(self.dataset, delimiter=',') dataFeat = RealFeatures(data.T) with totalTimer: # Get the new dimensionality, if it is necessary. dimension = re.search('-d (\d+)', options) if not dimension: d = data.shape[1] else: d = int(dimension.group(1)) if (d > data.shape[1]): Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater " + "than existing dimensionality (" + str(data.shape[1]) + ")!") q.put(-1) return -1 # Get the kernel type and make sure it is valid. kernel = re.search("-k ([^\s]+)", options) if not kernel: Log.Fatal( "Choose kernel type, valid choices are 'linear'," + " 'hyptan', 'polynomial' and 'gaussian'.") q.put(-1) return -1 elif kernel.group(1) == "polynomial": degree = re.search('-D (\d+)', options) degree = 1 if not degree else int(degree.group(1)) kernel = PolyKernel(dataFeat, dataFeat, degree, True) elif kernel.group(1) == "gaussian": kernel = GaussianKernel(dataFeat, dataFeat, 2.0) elif kernel.group(1) == "linear": kernel = LinearKernel(dataFeat, dataFeat) elif kernel.group(1) == "hyptan": kernel = SigmoidKernel(dataFeat, dataFeat, 2, 1.0, 1.0) else: Log.Fatal( "Invalid kernel type (" + kernel.group(1) + "); valid " + "choices are 'linear', 'hyptan', 'polynomial' and 'gaussian'." ) q.put(-1) return -1 # Perform Kernel Principal Components Analysis. model = KernelPCA(kernel) model.set_target_dim(d) model.init(dataFeat) model.apply_to_feature_matrix(dataFeat) except Exception as e: q.put(-1) return -1 time = totalTimer.ElapsedTime() q.put(time) return time
-0.36325018, -0.12206184, 0.10525247, -0.15663416, 1.03616948, -0.51699463, 0.59566286, 0.35363369, 0.10545559 ]) ] ############################################# # compute pre-svm ############################################# # create real-valued features as first step examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64) examples_presvm = numpy.transpose(examples_presvm) feat_presvm = RealFeatures(examples_presvm) lab_presvm = Labels(numpy.array(labels_presvm)) wdk_presvm = LinearKernel(feat_presvm, feat_presvm) presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_max_iterations(10000) presvm_liblinear.set_bias_enabled(False) presvm_liblinear.train() presvm_libsvm = LibSVM(1, wdk_presvm, lab_presvm) #presvm_libsvm = SVMLight(1, wdk_presvm, lab_presvm) #presvm_libsvm.io.set_loglevel(MSG_DEBUG) presvm_libsvm.set_bias_enabled(False) presvm_libsvm.train() my_w = presvm_liblinear.get_w() presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm)
def solver_mtk_shogun(C, all_xt, all_lt, task_indicator, M, L, eps, target_obj): """ implementation using multitask kernel """ xt = numpy.array(all_xt) lt = numpy.array(all_lt) tt = numpy.array(task_indicator, dtype=numpy.int32) tsm = numpy.array(M) print "task_sim:", tsm num_tasks = L.shape[0] # sanity checks assert len(xt) == len(lt) == len(tt) assert M.shape == L.shape assert num_tasks == len(set(tt)) # set up shogun objects if type(xt[0]) == numpy.string_: feat = StringCharFeatures(DNA) xt = [str(a) for a in xt] feat.set_features(xt) base_kernel = WeightedDegreeStringKernel(feat, feat, 8) else: feat = RealFeatures(xt.T) base_kernel = LinearKernel(feat, feat) lab = Labels(lt) # set up normalizer normalizer = MultitaskKernelNormalizer(tt.tolist()) for i in xrange(num_tasks): for j in xrange(num_tasks): normalizer.set_task_similarity(i, j, M[i, j]) print "num of unique tasks: ", normalizer.get_num_unique_tasks( task_indicator) # set up kernel base_kernel.set_cache_size(2000) base_kernel.set_normalizer(normalizer) base_kernel.init_normalizer() # set up svm svm = SVMLight() #LibSVM() svm.set_epsilon(eps) #print "reducing num threads to one" #svm.parallel.set_num_threads(1) #print "using one thread" # how often do we like to compute objective etc svm.set_record_interval(0) svm.set_target_objective(target_obj) svm.set_linadd_enabled(False) svm.set_batch_computation_enabled(False) svm.io.set_loglevel(MSG_DEBUG) #SET THREADS TO 1 svm.set_C(C, C) svm.set_bias_enabled(False) # prepare for training svm.set_labels(lab) svm.set_kernel(base_kernel) # train svm svm.train() train_times = svm.get_training_times() objectives = [-obj for obj in svm.get_dual_objectives()] if False: # get model parameters sv_idx = svm.get_support_vectors() sparse_alphas = svm.get_alphas() assert len(sv_idx) == len(sparse_alphas) # compute dense alpha (remove label) alphas = numpy.zeros(len(xt)) for id_sparse, id_dense in enumerate(sv_idx): alphas[id_dense] = sparse_alphas[id_sparse] * lt[id_dense] # print alphas W = alphas_to_w(alphas, xt, lt, task_indicator, M) primal_obj = compute_primal_objective( W.reshape(W.shape[0] * W.shape[1]), C, all_xt, all_lt, task_indicator, L) objectives.append(primal_obj) train_times.append(train_times[-1] + 100) return objectives, train_times
C = 0.017 epsilon = 1e-5 tube_epsilon = 1e-2 svm = LibSVM() svm.set_C(C, C) svm.set_epsilon(epsilon) svm.set_tube_epsilon(tube_epsilon) for i in xrange(3): data_train = random.rand(num_feats, num_vec) data_test = random.rand(num_feats, num_vec) feats_train = RealFeatures(data_train) feats_test = RealFeatures(data_test) labels = Labels(random.rand(num_vec).round() * 2 - 1) svm.set_kernel(LinearKernel(size_cache, scale)) svm.set_labels(labels) kernel = svm.get_kernel() print "kernel cache size: %s" % (kernel.get_cache_size()) kernel.init(feats_test, feats_test) svm.train() kernel.init(feats_train, feats_test) print svm.apply().get_labels() #kernel.remove_lhs_and_rhs() #import pdb #pdb.set_trace()