def kernel_comm_word_string_modular(
    fm_train_dna=traindat, fm_test_dna=testdat, order=3, gap=0, reverse=False, use_sign=False
):

    from modshogun import CommWordStringKernel
    from modshogun import StringWordFeatures, StringCharFeatures, DNA
    from modshogun import SortWordString

    charfeat = StringCharFeatures(DNA)
    charfeat.set_features(fm_train_dna)
    feats_train = StringWordFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    preproc = SortWordString()
    preproc.init(feats_train)
    feats_train.add_preprocessor(preproc)
    feats_train.apply_preprocessor()

    charfeat = StringCharFeatures(DNA)
    charfeat.set_features(fm_test_dna)
    feats_test = StringWordFeatures(charfeat.get_alphabet())
    feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    feats_test.add_preprocessor(preproc)
    feats_test.apply_preprocessor()

    kernel = CommWordStringKernel(feats_train, feats_train, use_sign)

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()
    return km_train, km_test, kernel
示例#2
0
def preprocessor_sortwordstring_modular(fm_train_dna=traindna,
                                        fm_test_dna=testdna,
                                        order=3,
                                        gap=0,
                                        reverse=False,
                                        use_sign=False):

    from modshogun import CommWordStringKernel
    from modshogun import StringCharFeatures, StringWordFeatures, DNA
    from modshogun import SortWordString

    charfeat = StringCharFeatures(fm_train_dna, DNA)
    feats_train = StringWordFeatures(charfeat.get_alphabet())
    feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    preproc = SortWordString()
    preproc.init(feats_train)
    feats_train.add_preprocessor(preproc)
    feats_train.apply_preprocessor()

    charfeat = StringCharFeatures(fm_test_dna, DNA)
    feats_test = StringWordFeatures(charfeat.get_alphabet())
    feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse)
    feats_test.add_preprocessor(preproc)
    feats_test.apply_preprocessor()

    kernel = CommWordStringKernel(feats_train, feats_train, use_sign)

    km_train = kernel.get_kernel_matrix()
    kernel.init(feats_train, feats_test)
    km_test = kernel.get_kernel_matrix()

    return km_train, km_test, kernel
def runShogunOneClassSVMDNASpectrumKernel(train_xt, train_lt, test_xt):
	"""
	run svm with spectrum kernel
	"""

    ##################################################
    # set up svr
	charfeat_train = StringCharFeatures(train_xt, DNA)
	feats_train = StringWordFeatures(DNA)
	feats_train.obtain_from_char(charfeat_train, K-1, K, GAP, False)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()
	
	charfeat_test = StringCharFeatures(test_xt, DNA)
	feats_test=StringWordFeatures(DNA)
	feats_test.obtain_from_char(charfeat_test, K-1, K, GAP, False)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()
	
	kernel=CommWordStringKernel(feats_train, feats_train, False)
	kernel.io.set_loglevel(MSG_DEBUG)

    # init kernel
	labels = BinaryLabels(train_lt)
	
	# run svm model
	print "Ready to train!"
	svm=LibSVMOneClass(SVMC, kernel)
	svm.set_epsilon(EPSILON)
	svm.train()


	# predictions
	print "Making predictions!"
	out1DecisionValues = svm.apply(feats_train)
	out1=out1DecisionValues.get_labels()
	kernel.init(feats_train, feats_test)
	out2DecisionValues = svm.apply(feats_test)
	out2=out2DecisionValues.get_labels()


#	predictions = svm.apply(feats_test)
#	return predictions, svm, predictions.get_labels()

	return out1,out2,out1DecisionValues,out2DecisionValues
def runShogunSVRSpectrumKernel(train_xt, train_lt, test_xt, svm_c=1):
	"""
	serialize svr with spectrum kernels
	"""

    ##################################################
    # set up svr
	charfeat_train = StringCharFeatures(train_xt, DNA)
	feats_train = StringWordFeatures(DNA)
	feats_train.obtain_from_char(charfeat_train, K-1, K, GAP, False)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()
	
	charfeat_test = StringCharFeatures(test_xt, DNA)
	feats_test=StringWordFeatures(DNA)
	feats_test.obtain_from_char(charfeat_test, K-1, K, GAP, False)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()
	
	kernel=CommWordStringKernel(feats_train, feats_train, False)
	kernel.io.set_loglevel(MSG_DEBUG)

    # init kernel
	labels = RegressionLabels(train_lt)
	
	# two svr models: epsilon and nu
	print "Ready to train!"
	svr_epsilon=LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_EPSILON_SVR)
	svr_epsilon.io.set_loglevel(MSG_DEBUG)
	svr_epsilon.train()

	# predictions
	print "Making predictions!"
	out1_epsilon=svr_epsilon.apply(feats_train).get_labels()
	kernel.init(feats_train, feats_test)
	out2_epsilon=svr_epsilon.apply(feats_test).get_labels()

	return out1_epsilon,out2_epsilon,kernel
示例#5
0
def runShogunSVRSpectrumKernel(train_xt, train_lt, test_xt, svm_c=1):
    """
	serialize svr with spectrum kernels
	"""

    ##################################################
    # set up svr
    charfeat_train = StringCharFeatures(train_xt, DNA)
    feats_train = StringWordFeatures(DNA)
    feats_train.obtain_from_char(charfeat_train, K - 1, K, GAP, False)
    preproc = SortWordString()
    preproc.init(feats_train)
    feats_train.add_preprocessor(preproc)
    feats_train.apply_preprocessor()

    charfeat_test = StringCharFeatures(test_xt, DNA)
    feats_test = StringWordFeatures(DNA)
    feats_test.obtain_from_char(charfeat_test, K - 1, K, GAP, False)
    feats_test.add_preprocessor(preproc)
    feats_test.apply_preprocessor()

    kernel = CommWordStringKernel(feats_train, feats_train, False)
    kernel.io.set_loglevel(MSG_DEBUG)

    # init kernel
    labels = RegressionLabels(train_lt)

    # two svr models: epsilon and nu
    print "Ready to train!"
    svr_epsilon = LibSVR(svm_c, SVRPARAM, kernel, labels, LIBSVR_EPSILON_SVR)
    svr_epsilon.io.set_loglevel(MSG_DEBUG)
    svr_epsilon.train()

    # predictions
    print "Making predictions!"
    out1_epsilon = svr_epsilon.apply(feats_train).get_labels()
    kernel.init(feats_train, feats_test)
    out2_epsilon = svr_epsilon.apply(feats_test).get_labels()

    return out1_epsilon, out2_epsilon, kernel
def tests_check_commwordkernel_memleak_modular (num, order, gap, reverse):
	import gc
	from modshogun import Alphabet,StringCharFeatures,StringWordFeatures,DNA
	from modshogun import SortWordString, MSG_DEBUG
	from modshogun import CommWordStringKernel, IdentityKernelNormalizer
	from numpy import mat

	POS=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT',
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT',
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT',
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT']
	NEG=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT',
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT',
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT',
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT',
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT']

	for i in range(10):
		alpha=Alphabet(DNA)
		traindat=StringCharFeatures(alpha)
		traindat.set_features(POS+NEG)
		trainudat=StringWordFeatures(traindat.get_alphabet());
		trainudat.obtain_from_char(traindat, order-1, order, gap, reverse)
		#trainudat.io.set_loglevel(MSG_DEBUG)
		pre = SortWordString()
		#pre.io.set_loglevel(MSG_DEBUG)
		pre.init(trainudat)
		trainudat.add_preprocessor(pre)
		trainudat.apply_preprocessor()
		spec = CommWordStringKernel(10, False)
		spec.set_normalizer(IdentityKernelNormalizer())
		spec.init(trainudat, trainudat)
		K=spec.get_kernel_matrix()

	del POS
	del NEG
	del order
	del gap
	del reverse
	return K
def tests_check_commwordkernel_memleak_modular(num, order, gap, reverse):
    import gc
    from modshogun import Alphabet, StringCharFeatures, StringWordFeatures, DNA
    from modshogun import SortWordString, MSG_DEBUG
    from modshogun import CommWordStringKernel, IdentityKernelNormalizer
    from numpy import mat

    POS = [
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT',
        num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT',
        num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT',
        num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT',
        num * 'TTGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT'
    ]
    NEG = [
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT',
        num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT',
        num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT',
        num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT',
        num * 'TTGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT',
        num * 'ACGT'
    ]

    for i in range(10):
        alpha = Alphabet(DNA)
        traindat = StringCharFeatures(alpha)
        traindat.set_features(POS + NEG)
        trainudat = StringWordFeatures(traindat.get_alphabet())
        trainudat.obtain_from_char(traindat, order - 1, order, gap, reverse)
        #trainudat.io.set_loglevel(MSG_DEBUG)
        pre = SortWordString()
        #pre.io.set_loglevel(MSG_DEBUG)
        pre.init(trainudat)
        trainudat.add_preprocessor(pre)
        trainudat.apply_preprocessor()
        spec = CommWordStringKernel(10, False)
        spec.set_normalizer(IdentityKernelNormalizer())
        spec.init(trainudat, trainudat)
        K = spec.get_kernel_matrix()

    del POS
    del NEG
    del order
    del gap
    del reverse
    return K