def kernel_linear_byte_modular(fm_train_byte=traindat,fm_test_byte=testdat):
	from shogun.Kernel import LinearKernel
	from shogun.Features import ByteFeatures

	feats_train=ByteFeatures(fm_train_byte)
	feats_test=ByteFeatures(fm_test_byte)

	kernel=LinearKernel(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return kernel
示例#2
0
def kernel_sparse_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.1):
	from shogun.Features import SparseRealFeatures
	from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer

	feats_train=SparseRealFeatures(fm_train_real)
	feats_test=SparseRealFeatures(fm_test_real)

	kernel=LinearKernel()
	kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	kernel.init(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
示例#3
0
def kernel_linear_word_modular (fm_train_word=traindat,fm_test_word=testdat,scale=1.2):
	
	from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
	from shogun.Features import WordFeatures

	feats_train=WordFeatures(fm_train_word)
	feats_test=WordFeatures(fm_test_word)

	kernel=LinearKernel(feats_train, feats_train)
	kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	kernel.init(feats_train, feats_train)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return kernel
示例#4
0
def kernel_director_linear_modular(fm_train_real=traindat,
                                   fm_test_real=testdat,
                                   scale=1.2):

    from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
    from modshogun import Time

    feats_train = RealFeatures(fm_train_real)
    feats_train.io.set_loglevel(0)
    feats_train.parallel.set_num_threads(1)
    feats_test = RealFeatures(fm_test_real)

    kernel = LinearKernel()
    kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
    kernel.init(feats_train, feats_train)

    dkernel = DirectorLinearKernel()
    dkernel.set_normalizer(AvgDiagKernelNormalizer(scale))
    dkernel.init(feats_train, feats_train)

    print "km_train"
    t = Time()
    km_train = kernel.get_kernel_matrix()
    t1 = t.cur_time_diff(True)

    print "dkm_train"
    t = Time()
    dkm_train = dkernel.get_kernel_matrix()
    t2 = t.cur_time_diff(True)

    print "km_train", km_train
    print "dkm_train", dkm_train

    return km_train, dkm_train
def kernel_sparse_linear_modular(fm_train_real=traindat, fm_test_real=testdat, scale=1.1):
    from shogun.Features import SparseRealFeatures
    from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer

    feats_train = SparseRealFeatures(fm_train_real)
    feats_test = SparseRealFeatures(fm_test_real)

    kernel = LinearKernel()
    kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
    kernel.init(feats_train, feats_train)
    km_train = kernel.get_kernel_matrix()

    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
示例#6
0
def create_kernel(kname, features, kparam=None):

    if kname == 'gauss':
        kernel = GaussianKernel(features, features, kparam)
    elif kname == 'linear':
        kernel = LinearKernel(features, features)
    elif kname == 'poly':
        kernel = PolyKernel(features, features, kparam, True, False)

    return kernel
def linear ():
	print 'Linear'
	from shogun.Features import RealFeatures
	from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	scale=1.2

	kernel=LinearKernel()
	kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	kernel.init(feats_train, feats_train)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
def kernel_linear_word_modular(fm_train_word=traindat, fm_test_word=testdat, scale=1.2):

    from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
    from shogun.Features import WordFeatures

    feats_train = WordFeatures(fm_train_word)
    feats_test = WordFeatures(fm_test_word)

    kernel = LinearKernel(feats_train, feats_train)
    kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
    kernel.init(feats_train, feats_train)

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return kernel
def kernel_director_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2):

	from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
	from modshogun import Time

	feats_train=RealFeatures(fm_train_real)
	feats_train.io.set_loglevel(0)
	feats_train.parallel.set_num_threads(1)
	feats_test=RealFeatures(fm_test_real)
	 
	kernel=LinearKernel()
	kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	kernel.init(feats_train, feats_train)

	dkernel=DirectorLinearKernel()
	dkernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	dkernel.init(feats_train, feats_train)

	print  "km_train"
	t=Time()
	km_train=kernel.get_kernel_matrix()
	t1=t.cur_time_diff(True)

	print  "dkm_train"
	t=Time()
	dkm_train=dkernel.get_kernel_matrix()
	t2=t.cur_time_diff(True)	

	print "km_train", km_train
	print "dkm_train", dkm_train

	return km_train, dkm_train
示例#10
0
def create_kernel(kname, kparam, feats_train):
    """Call the corresponding constructor for the kernel"""

    if kname == 'gauss':
        kernel = GaussianKernel(feats_train, feats_train, kparam['width'])
    elif kname == 'linear':
        kernel = LinearKernel(feats_train, feats_train)
        kernel.set_normalizer(AvgDiagKernelNormalizer(kparam['scale']))
    elif kname == 'poly':
        kernel = PolyKernel(feats_train, feats_train, kparam['degree'],
                            kparam['inhomogene'], kparam['normal'])
    elif kname == 'wd':
        kernel = WeightedDegreePositionStringKernel(feats_train, feats_train,
                                                    kparam['degree'])
        kernel.set_normalizer(
            AvgDiagKernelNormalizer(float(kparam['seqlength'])))
        kernel.set_shifts(kparam['shift'] *
                          numpy.ones(kparam['seqlength'], dtype=numpy.int32))
        #kernel=WeightedDegreeStringKernel(feats_train, feats_train, kparam['degree'])
    elif kname == 'spec':
        kernel = CommUlongStringKernel(feats_train, feats_train)
    elif kname == 'cumspec':
        kernel = WeightedCommWordStringKernel(feats_train, feats_train)
        kernel.set_weights(numpy.ones(kparam['degree']))
    elif kname == 'spec2':
        kernel = CombinedKernel()
        k0 = CommWordStringKernel(feats_train['f0'], feats_train['f0'])
        k0.io.disable_progress()
        kernel.append_kernel(k0)
        k1 = CommWordStringKernel(feats_train['f1'], feats_train['f1'])
        k1.io.disable_progress()
        kernel.append_kernel(k1)
    elif kname == 'cumspec2':
        kernel = CombinedKernel()
        k0 = WeightedCommWordStringKernel(feats_train['f0'], feats_train['f0'])
        k0.set_weights(numpy.ones(kparam['degree']))
        k0.io.disable_progress()
        kernel.append_kernel(k0)
        k1 = WeightedCommWordStringKernel(feats_train['f1'], feats_train['f1'])
        k1.set_weights(numpy.ones(kparam['degree']))
        k1.io.disable_progress()
        kernel.append_kernel(k1)
    elif kname == 'localalign':
        kernel = LocalAlignmentStringKernel(feats_train, feats_train)
    elif kname == 'localimprove':
        kernel = LocalityImprovedStringKernel(feats_train, feats_train, kparam['length'],\
                                              kparam['indeg'], kparam['outdeg'])
    else:
        print 'Unknown kernel %s' % kname

    kernel.set_cache_size(32)
    return kernel
def create_kernel(examples, param):
    """
    kernel factory
    
    @param examples: list/array of examples
    @type examples: list
    @param param: parameter object
    @type param: Parameter
    
    @return subclass of shogun Kernel object
    @rtype: Kernel
    """

    # first create feature object of correct type
    feat = create_features(examples, param)

    kernel = None

    if param.kernel == "WeightedDegreeStringKernel":
        kernel = WeightedDegreeStringKernel(feat, feat, param.wdk_degree)
        kernel.set_cache_size(200)

    elif param.kernel == "LinearKernel":
        kernel = LinearKernel(feat, feat)

    elif param.kernel == "PolyKernel":
        kernel = PolyKernel(feat, feat, 1, False)

    elif param.kernel == "GaussianKernel":
        kernel = GaussianKernel(feat, feat, param.sigma)

    elif param.kernel == "WeightedDegreeRBFKernel":
        size_cache = 200
        nof_properties = 20
        sigma = param.base_similarity
        kernel = WeightedDegreeRBFKernel(feat, feat, sigma, param.wdk_degree,
                                         nof_properties, size_cache)

    elif param.kernel == "Promoter":
        kernel = create_promoter_kernel(examples, param.flags)

    else:
        raise Exception, "Unknown kernel type."

    if hasattr(param, "flags") and param.flags.has_key("cache_size"):
        kernel.set_cache_size(param.flags["cache_size"])

    if param.flags.has_key("debug"):
        kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG)

    return kernel
def mkl_multiclass_modular(fm_train_real, fm_test_real, label_train_multiclass,
                           width, C, epsilon, num_threads, mkl_epsilon,
                           mkl_norm):

    from shogun.Features import CombinedFeatures, RealFeatures, Labels
    from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel, PolyKernel
    from shogun.Classifier import MKLMultiClass

    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    feats_test = CombinedFeatures()

    subkfeats_train = RealFeatures(fm_train_real)
    subkfeats_test = RealFeatures(fm_test_real)
    subkernel = GaussianKernel(10, width)
    feats_train.append_feature_obj(subkfeats_train)
    feats_test.append_feature_obj(subkfeats_test)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures(fm_train_real)
    subkfeats_test = RealFeatures(fm_test_real)
    subkernel = LinearKernel()
    feats_train.append_feature_obj(subkfeats_train)
    feats_test.append_feature_obj(subkfeats_test)
    kernel.append_kernel(subkernel)

    subkfeats_train = RealFeatures(fm_train_real)
    subkfeats_test = RealFeatures(fm_test_real)
    subkernel = PolyKernel(10, 2)
    feats_train.append_feature_obj(subkfeats_train)
    feats_test.append_feature_obj(subkfeats_test)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)

    labels = Labels(label_train_multiclass)

    mkl = MKLMultiClass(C, kernel, labels)

    mkl.set_epsilon(epsilon)
    mkl.parallel.set_num_threads(num_threads)
    mkl.set_mkl_epsilon(mkl_epsilon)
    mkl.set_mkl_norm(mkl_norm)

    mkl.train()

    kernel.init(feats_train, feats_test)

    out = mkl.apply().get_labels()
    return out
def converter_kernellocallylinearembedding_modular(data, k):
    from shogun.Features import RealFeatures
    from shogun.Converter import KernelLocallyLinearEmbedding
    from shogun.Kernel import LinearKernel

    features = RealFeatures(data)

    kernel = LinearKernel()

    converter = KernelLocallyLinearEmbedding(kernel)
    converter.set_target_dim(1)
    converter.set_k(k)
    converter.apply(features)

    return features
示例#14
0
def kernel_director_linear_modular(fm_train_real=traindat,
                                   fm_test_real=testdat,
                                   scale=1.2):
    try:
        from shogun.Kernel import DirectorKernel
    except ImportError:
        print "recompile shogun with --enable-swig-directors"
        return

    class DirectorLinearKernel(DirectorKernel):
        def __init__(self):
            DirectorKernel.__init__(self, True)

        def kernel_function(self, idx_a, idx_b):
            seq1 = self.get_lhs().get_feature_vector(idx_a)
            seq2 = self.get_rhs().get_feature_vector(idx_b)
            return numpy.dot(seq1, seq2)

    from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
    from modshogun import Time

    feats_train = RealFeatures(fm_train_real)
    #feats_train.io.set_loglevel(MSG_DEBUG)
    feats_train.parallel.set_num_threads(1)
    feats_test = RealFeatures(fm_test_real)

    kernel = LinearKernel()
    kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
    kernel.init(feats_train, feats_train)

    dkernel = DirectorLinearKernel()
    dkernel.set_normalizer(AvgDiagKernelNormalizer(scale))
    dkernel.init(feats_train, feats_train)

    #print  "km_train"
    t = Time()
    km_train = kernel.get_kernel_matrix()
    #t1=t.cur_time_diff(True)

    #print  "dkm_train"
    t = Time()
    dkm_train = dkernel.get_kernel_matrix()
    #t2=t.cur_time_diff(True)

    #print "km_train", km_train
    #print "dkm_train", dkm_train

    return km_train, dkm_train
示例#15
0
def converter_kernellocallylinearembedding_modular(data, k):
    try:
        from shogun.Features import RealFeatures
        from shogun.Converter import KernelLocallyLinearEmbedding
        from shogun.Kernel import LinearKernel

        features = RealFeatures(data)

        kernel = LinearKernel()

        converter = KernelLocallyLinearEmbedding(kernel)
        converter.set_target_dim(1)
        converter.set_k(k)
        converter.apply(features)

        return features
    except ImportError:
        print('No Eigen3 available')
def kernel_linear_byte_modular(fm_train_byte=traindat,fm_test_byte=testdat):
	from shogun.Kernel import LinearKernel
	from shogun.Features import ByteFeatures

	feats_train=ByteFeatures(fm_train_byte)
	feats_test=ByteFeatures(fm_test_byte)

	kernel=LinearKernel(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return kernel
def create_empty_kernel(param):
    """
    kernel factory
    
    @param param: parameter object
    @type param: Parameter
    
    @return subclass of shogun Kernel object
    @rtype: Kernel
    """

    kernel = None

    if param.kernel == "WeightedDegreeStringKernel":
        kernel = WeightedDegreeStringKernel(param.wdk_degree)

    elif param.kernel == "LinearKernel":
        kernel = LinearKernel()

    elif param.kernel == "PolyKernel":
        kernel = PolyKernel(10, 1, False)

    elif param.kernel == "GaussianKernel":
        kernel = GaussianKernel(10, param.sigma)

    elif param.kernel == "WeightedDegreeRBFKernel":
        size_cache = 50
        nof_properties = 5  #20
        sigma = param.transform
        kernel = WeightedDegreeRBFKernel(size_cache, sigma, param.wdk_degree,
                                         nof_properties)

    else:

        raise Exception, "Unknown kernel type:" + param.kernel

    if hasattr(param, "flags") and param.flags.has_key("cache_size"):
        kernel.set_cache_size(param.flags["cache_size"])

    if param.flags.has_key("debug"):
        kernel.io.set_loglevel(shogun.Kernel.MSG_DEBUG)

    return kernel
def kernel_director_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2):
	try:
		from shogun.Kernel import DirectorKernel
	except ImportError:
		print "recompile shogun with --enable-swig-directors"
		return

	class DirectorLinearKernel(DirectorKernel):
		def __init__(self):
			DirectorKernel.__init__(self, True)
		def kernel_function(self, idx_a, idx_b):
			seq1 = self.get_lhs().get_feature_vector(idx_a)
			seq2 = self.get_rhs().get_feature_vector(idx_b)
			return numpy.dot(seq1, seq2)


	from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
	from modshogun import Time

	feats_train=RealFeatures(fm_train_real)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_train.parallel.set_num_threads(1)
	feats_test=RealFeatures(fm_test_real)
	 
	kernel=LinearKernel()
	kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	kernel.init(feats_train, feats_train)

	dkernel=DirectorLinearKernel()
	dkernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	dkernel.init(feats_train, feats_train)

	#print  "km_train"
	t=Time()
	km_train=kernel.get_kernel_matrix()
	#t1=t.cur_time_diff(True)

	#print  "dkm_train"
	t=Time()
	dkm_train=dkernel.get_kernel_matrix()
	#t2=t.cur_time_diff(True)

	#print "km_train", km_train
	#print "dkm_train", dkm_train

	return km_train, dkm_train
示例#19
0
def get_presvm(B=2.0):

    examples_presvm = [numpy.array([ 2.1788894 ,  3.89163458,  5.55086917,  6.4022742 ,  3.14964751, -0.4622959 ,  5.38538904,  5.9962938 ,  6.29690849]),
     numpy.array([ 2.1788894 ,  3.89163458,  5.55086917,  6.4022742 ,  3.14964751,  -0.4622959 ,  5.38538904,  5.9962938 ,  6.29690849]),
     numpy.array([ 0.93099452,  0.38871617,  1.57968949,  1.25672527, -0.8123137 ,   0.20786586,  1.378121  ,  1.15598866,  0.80265343]),
     numpy.array([ 0.68705535,  0.15144113, -0.81306157, -0.7664577 ,  1.16452945,  -0.2712956 ,  0.483094  , -0.16302007, -0.39094812]),
     numpy.array([-0.71374437, -0.16851719,  1.43826895,  0.95961166, -0.2360497 ,  -0.30425755,  1.63157052,  1.15990427,  0.63801465]),
     numpy.array([ 0.68705535,  0.15144113, -0.81306157, -0.7664577 ,  1.16452945, -0.2712956 ,  0.483094  , -0.16302007, -0.39094812]),
     numpy.array([-0.71374437, -0.16851719,  1.43826895,  0.95961166, -0.2360497 , -0.30425755,  1.63157052,  1.15990427,  0.63801465]),
     numpy.array([-0.98028302, -0.23974489,  2.1687206 ,  1.99338824, -0.67070205, -0.33167281,  1.3500379 ,  1.34915685,  1.13747975]),
     numpy.array([ 0.67109612,  0.12662017, -0.48254886, -0.49091898,  1.31522237, -0.34108933,  0.57832179, -0.01992828, -0.26581628]),
     numpy.array([ 0.3193611 ,  0.44903416,  3.62187778,  4.1490827 ,  1.58832961,  1.95583397,  1.36836023,  1.92521945,  2.41114998])]
    labels_presvm = [-1.0, -1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0]

    examples = [numpy.array([-0.49144487, -0.19932263, -0.00408188, -0.21262012,  0.14621013, -0.50415481,  0.32317317, -0.00317602, -0.21422637]), 
     numpy.array([ 0.0511817 , -0.04226666, -0.30454651, -0.38759116,  0.31639514,  0.32558471,  0.49364473,  0.04515591, -0.06963456]),
     numpy.array([-0.30324369, -0.11909251, -0.03210278, -0.2779561 ,  1.31488853, -0.33165365,  0.60176018, -0.00384946, -0.15603975]),
     numpy.array([ 0.59282756, -0.0039991 , -0.26028983, -0.26722552,  1.63314995, -0.51199338,  0.33340685, -0.0170519 , -0.19211039]),
     numpy.array([-0.18338766, -0.07783465,  0.42019824,  0.201753  ,  2.01160098,  0.33326111,  0.75591909,  0.36631525,  0.1761829 ]),
     numpy.array([ 0.10273793, -0.02189574,  0.91092358,  0.74827973,  0.51882902, -0.1286531 ,  0.64463658,  0.67468349,  0.55587266]),
     numpy.array([-0.09727099, -0.13413522,  0.18771062,  0.19411594,  1.48547364, -0.43169608,  0.55064534,  0.24331473,  0.10878847]),
     numpy.array([-0.22494375, -0.15492964,  0.28017737,  0.29794467,  0.96403895,  0.43880289,  0.08053425,  0.07456818,  0.12102371]),
     numpy.array([-0.18161417, -0.17692039,  0.19554942, -0.00785625,  1.38315115, -0.05923183, -0.05723568, -0.15463646, -0.24249483]),
     numpy.array([-0.36538359, -0.20040061, -0.38384388, -0.40206556, -0.25040256,  0.94205875,  0.40162798,  0.00327328, -0.24107393])]

    labels = [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0]

    examples_test = [numpy.array([-0.45159799, -0.11401394,  1.28574573,  1.09144306,  0.92253119,  -0.47230164,  0.77032486,  0.83047366,  0.74768906]),
     numpy.array([ 0.42613105,  0.0092778 , -0.78640296, -0.71632445,  0.41154244,   0.88380309,  0.19475759, -0.14195876, -0.30479425]),
     numpy.array([-0.09727099, -0.13413522,  0.18771062,  0.19411594,  1.48547364,  -0.43169608,  0.55064534,  0.24331473,  0.10878847]),
     numpy.array([ 0.11558796, -0.08867647, -0.26432074, -0.30924546, -1.08243017,  -0.1339607 , -0.1956124 , -0.2428358 , -0.25761213]),
     numpy.array([ 1.23679696,  0.18753081, -0.25593329, -0.12051991,  0.64976989,  -0.17184101,  0.14951337,  0.01988587, -0.0356698 ]),
     numpy.array([ 1.03355002,  0.05316195, -0.97905368, -0.75482121,  0.28673776,   2.27142733,  0.02654739, -0.31109851, -0.44555277]),
     numpy.array([-0.53662325, -0.21434756, -0.12105795, -0.27531257,  0.66947047,   0.05474302, -0.00717455, -0.17700575, -0.22253444]),
     numpy.array([ 0.11272632, -0.12674826, -0.49736457, -0.51445609,  0.88518932,  -0.51558669, -0.12000557, -0.32973613, -0.38488736]),
     numpy.array([ 0.8372111 ,  0.06972199, -1.00454229, -0.79869642,  1.19376333,  -0.40160273, -0.25122157, -0.46417918, -0.50234858]),
     numpy.array([-0.36325018, -0.12206184,  0.10525247, -0.15663416,  1.03616948,  -0.51699463,  0.59566286,  0.35363369,  0.10545559])]


    #############################################
    #    compute pre-svm
    #############################################


    # create real-valued features as first step
    examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64)
    examples_presvm = numpy.transpose(examples_presvm)

    feat_presvm = RealFeatures(examples_presvm)
    lab_presvm = Labels(numpy.array(labels_presvm))
    wdk_presvm = LinearKernel(feat_presvm, feat_presvm)



    presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm)
    presvm_liblinear.set_max_iterations(10000)
    presvm_liblinear.set_bias_enabled(False)
    presvm_liblinear.train()


    #return presvm_liblinear


    #def get_da_svm(presvm_liblinear):


    #############################################
    #    compute linear term manually
    #############################################

    examples = numpy.array(examples, dtype=numpy.float64)
    examples = numpy.transpose(examples)

    feat = RealFeatures(examples)
    lab = Labels(numpy.array(labels))

    dasvm_liblinear = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_liblinear, B)
    dasvm_liblinear.set_bias_enabled(False)
    dasvm_liblinear.train()

    helper.save("/tmp/svm", presvm_liblinear)
    presvm_pickle = helper.load("/tmp/svm")

    dasvm_pickle = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_pickle, B)
    dasvm_pickle.set_bias_enabled(False)
    dasvm_pickle.train()

    helper.save("/tmp/dasvm", dasvm_liblinear)
    dasvm_pickle2 = helper.load("/tmp/dasvm")

    #############################################
    #    load test data
    #############################################

    examples_test = numpy.array(examples_test, dtype=numpy.float64)
    examples_test = numpy.transpose(examples_test)
    feat_test = RealFeatures(examples_test)

    # check if pickled and unpickled classifiers behave the same
    out1 = dasvm_liblinear.classify(feat_test).get_labels()
    out2 = dasvm_pickle.classify(feat_test).get_labels()

    # compare outputs
    for i in xrange(len(out1)):    
        
        try:
            assert(abs(out1[i]-out2[i])<= 0.001)
        except:
            print "(%.5f, %.5f)" % (out1[i], out2[i])

            
    print "classification agrees."
示例#20
0
        def RunKPCAShogun(q):
            totalTimer = Timer()

            try:
                # Load input dataset.
                Log.Info("Loading dataset", self.verbose)
                data = np.genfromtxt(self.dataset, delimiter=',')
                dataFeat = RealFeatures(data.T)

                with totalTimer:
                    # Get the new dimensionality, if it is necessary.
                    dimension = re.search('-d (\d+)', options)
                    if not dimension:
                        d = data.shape[1]
                    else:
                        d = int(dimension.group(1))
                        if (d > data.shape[1]):
                            Log.Fatal("New dimensionality (" + str(d) +
                                      ") cannot be greater " +
                                      "than existing dimensionality (" +
                                      str(data.shape[1]) + ")!")
                            q.put(-1)
                            return -1

                    # Get the kernel type and make sure it is valid.
                    kernel = re.search("-k ([^\s]+)", options)
                    if not kernel:
                        Log.Fatal(
                            "Choose kernel type, valid choices are 'linear'," +
                            " 'hyptan', 'polynomial' and 'gaussian'.")
                        q.put(-1)
                        return -1
                    elif kernel.group(1) == "polynomial":
                        degree = re.search('-D (\d+)', options)
                        degree = 1 if not degree else int(degree.group(1))

                        kernel = PolyKernel(dataFeat, dataFeat, degree, True)
                    elif kernel.group(1) == "gaussian":
                        kernel = GaussianKernel(dataFeat, dataFeat, 2.0)
                    elif kernel.group(1) == "linear":
                        kernel = LinearKernel(dataFeat, dataFeat)
                    elif kernel.group(1) == "hyptan":
                        kernel = SigmoidKernel(dataFeat, dataFeat, 2, 1.0, 1.0)
                    else:
                        Log.Fatal(
                            "Invalid kernel type (" + kernel.group(1) +
                            "); valid " +
                            "choices are 'linear', 'hyptan', 'polynomial' and 'gaussian'."
                        )
                        q.put(-1)
                        return -1

                    # Perform Kernel Principal Components Analysis.
                    model = KernelPCA(kernel)
                    model.set_target_dim(d)
                    model.init(dataFeat)
                    model.apply_to_feature_matrix(dataFeat)
            except Exception as e:
                q.put(-1)
                return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
示例#21
0
        -0.36325018, -0.12206184, 0.10525247, -0.15663416, 1.03616948,
        -0.51699463, 0.59566286, 0.35363369, 0.10545559
    ])
]

#############################################
#    compute pre-svm
#############################################

# create real-valued features as first step
examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64)
examples_presvm = numpy.transpose(examples_presvm)

feat_presvm = RealFeatures(examples_presvm)
lab_presvm = Labels(numpy.array(labels_presvm))
wdk_presvm = LinearKernel(feat_presvm, feat_presvm)

presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm)
presvm_liblinear.set_max_iterations(10000)
presvm_liblinear.set_bias_enabled(False)
presvm_liblinear.train()

presvm_libsvm = LibSVM(1, wdk_presvm, lab_presvm)
#presvm_libsvm = SVMLight(1, wdk_presvm, lab_presvm)

#presvm_libsvm.io.set_loglevel(MSG_DEBUG)
presvm_libsvm.set_bias_enabled(False)
presvm_libsvm.train()

my_w = presvm_liblinear.get_w()
presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm)
示例#22
0
def solver_mtk_shogun(C, all_xt, all_lt, task_indicator, M, L, eps,
                      target_obj):
    """
    implementation using multitask kernel
    """

    xt = numpy.array(all_xt)
    lt = numpy.array(all_lt)
    tt = numpy.array(task_indicator, dtype=numpy.int32)
    tsm = numpy.array(M)

    print "task_sim:", tsm

    num_tasks = L.shape[0]

    # sanity checks
    assert len(xt) == len(lt) == len(tt)
    assert M.shape == L.shape
    assert num_tasks == len(set(tt))

    # set up shogun objects
    if type(xt[0]) == numpy.string_:
        feat = StringCharFeatures(DNA)
        xt = [str(a) for a in xt]
        feat.set_features(xt)
        base_kernel = WeightedDegreeStringKernel(feat, feat, 8)
    else:
        feat = RealFeatures(xt.T)
        base_kernel = LinearKernel(feat, feat)

    lab = Labels(lt)

    # set up normalizer
    normalizer = MultitaskKernelNormalizer(tt.tolist())

    for i in xrange(num_tasks):
        for j in xrange(num_tasks):
            normalizer.set_task_similarity(i, j, M[i, j])

    print "num of unique tasks: ", normalizer.get_num_unique_tasks(
        task_indicator)

    # set up kernel
    base_kernel.set_cache_size(2000)
    base_kernel.set_normalizer(normalizer)
    base_kernel.init_normalizer()

    # set up svm
    svm = SVMLight()  #LibSVM()

    svm.set_epsilon(eps)
    #print "reducing num threads to one"
    #svm.parallel.set_num_threads(1)
    #print "using one thread"

    # how often do we like to compute objective etc
    svm.set_record_interval(0)
    svm.set_target_objective(target_obj)

    svm.set_linadd_enabled(False)
    svm.set_batch_computation_enabled(False)
    svm.io.set_loglevel(MSG_DEBUG)
    #SET THREADS TO 1

    svm.set_C(C, C)
    svm.set_bias_enabled(False)

    # prepare for training
    svm.set_labels(lab)
    svm.set_kernel(base_kernel)

    # train svm
    svm.train()

    train_times = svm.get_training_times()
    objectives = [-obj for obj in svm.get_dual_objectives()]

    if False:

        # get model parameters
        sv_idx = svm.get_support_vectors()
        sparse_alphas = svm.get_alphas()

        assert len(sv_idx) == len(sparse_alphas)

        # compute dense alpha (remove label)
        alphas = numpy.zeros(len(xt))
        for id_sparse, id_dense in enumerate(sv_idx):
            alphas[id_dense] = sparse_alphas[id_sparse] * lt[id_dense]

        # print alphas
        W = alphas_to_w(alphas, xt, lt, task_indicator, M)
        primal_obj = compute_primal_objective(
            W.reshape(W.shape[0] * W.shape[1]), C, all_xt, all_lt,
            task_indicator, L)
        objectives.append(primal_obj)
        train_times.append(train_times[-1] + 100)

    return objectives, train_times
C = 0.017
epsilon = 1e-5
tube_epsilon = 1e-2
svm = LibSVM()
svm.set_C(C, C)
svm.set_epsilon(epsilon)
svm.set_tube_epsilon(tube_epsilon)

for i in xrange(3):
    data_train = random.rand(num_feats, num_vec)
    data_test = random.rand(num_feats, num_vec)
    feats_train = RealFeatures(data_train)
    feats_test = RealFeatures(data_test)
    labels = Labels(random.rand(num_vec).round() * 2 - 1)

    svm.set_kernel(LinearKernel(size_cache, scale))
    svm.set_labels(labels)

    kernel = svm.get_kernel()
    print "kernel cache size: %s" % (kernel.get_cache_size())

    kernel.init(feats_test, feats_test)
    svm.train()

    kernel.init(feats_train, feats_test)
    print svm.apply().get_labels()

    #kernel.remove_lhs_and_rhs()

    #import pdb
    #pdb.set_trace()