def distance_hammingword_modular (fm_train_dna=traindna,fm_test_dna=testdna, fm_test_real=testdat,order=3,gap=0,reverse=False,use_sign=False): from modshogun import StringCharFeatures, StringWordFeatures, DNA from modshogun import SortWordString from modshogun import HammingWordDistance charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor() distance=HammingWordDistance(feats_train, feats_train, use_sign) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test
def kernel_histogram_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,order=3,gap=0,reverse=False): from modshogun import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels from modshogun import HistogramWordStringKernel from modshogun import PluginEstimate#, MSG_DEBUG reverse = reverse charfeat=StringCharFeatures(DNA) #charfeat.io.set_loglevel(MSG_DEBUG) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) pie=PluginEstimate() labels=BinaryLabels(label_train_dna) pie.set_labels(labels) pie.set_features(feats_train) pie.train() kernel=HistogramWordStringKernel(feats_train, feats_train, pie) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) pie.set_features(feats_test) pie.apply().get_labels() km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def distribution_linearhmm_modular (fm_dna=traindna,order=3,gap=0,reverse=False): from modshogun import StringWordFeatures, StringCharFeatures, DNA from modshogun import LinearHMM charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) hmm=LinearHMM(feats) hmm.train() hmm.get_transition_probs() num_examples=feats.get_num_vectors() num_param=hmm.get_num_model_parameters() for i in range(num_examples): for j in range(num_param): hmm.get_log_derivative(j, i) out_likelihood = hmm.get_log_likelihood() out_sample = hmm.get_log_likelihood_sample() return hmm,out_likelihood ,out_sample
def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print('No support for SVMLight available.') return feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=BinaryLabels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.apply().get_labels() return kernel
def distribution_linearhmm_modular(fm_dna=traindna, order=3, gap=0, reverse=False): from modshogun import StringWordFeatures, StringCharFeatures, DNA from modshogun import LinearHMM charfeat = StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats = StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order - 1, order, gap, reverse) hmm = LinearHMM(feats) hmm.train() hmm.get_transition_probs() num_examples = feats.get_num_vectors() num_param = hmm.get_num_model_parameters() for i in range(num_examples): for j in range(num_param): hmm.get_log_derivative(j, i) out_likelihood = hmm.get_log_likelihood() out_sample = hmm.get_log_likelihood_sample() return hmm, out_likelihood, out_sample
def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_examples): from modshogun import StringWordFeatures, StringCharFeatures, CUBE from modshogun import HMM, BW_NORMAL charfeat=StringCharFeatures(CUBE) charfeat.set_features(fm_cube) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) hmm=HMM(feats, N, M, pseudo) hmm.train() hmm.baum_welch_viterbi_train(BW_NORMAL) num_examples=feats.get_num_vectors() num_param=hmm.get_num_model_parameters() for i in range(num_examples): for j in range(num_param): hmm.get_log_derivative(j, i) best_path=0 best_path_state=0 for i in range(num_examples): best_path+=hmm.best_path(i) for j in range(N): best_path_state+=hmm.get_best_path_state(i, j) lik_example = hmm.get_log_likelihood() lik_sample = hmm.get_log_likelihood_sample() return lik_example, lik_sample, hmm
def classifier_svmlight_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, C=1.2, epsilon=1e-5, num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print('No support for SVMLight available.') return feats_train = StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree = 20 kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.apply().get_labels() return kernel
def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna,degree=3, \ C=10,epsilon=1e-5,num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print("SVMLight is not available") exit(0) feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=BinaryLabels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double)); svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() return out,kernel
def distance_canberraword_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False): from modshogun import StringCharFeatures, StringWordFeatures, DNA from modshogun import SortWordString from modshogun import CanberraWordDistance charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor() distance=CanberraWordDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test
def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna,degree=3, \ C=10,epsilon=1e-5,num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print("SVMLight is not available") exit(0) feats_train = StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term( -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double)) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() return out, kernel
def kernel_comm_word_string_modular(fm_train_dna=traindat, fm_test_dna=testdat, order=3, gap=0, reverse=False, use_sign=False): from modshogun import CommWordStringKernel from modshogun import StringWordFeatures, StringCharFeatures, DNA from modshogun import SortWordString charfeat = StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train = StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) preproc = SortWordString() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() charfeat = StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test = StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor() kernel = CommWordStringKernel(feats_train, feats_train, use_sign) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def kernel_histogram_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,order=3,ppseudo_count=1,npseudo_count=1): from modshogun import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels from modshogun import HistogramWordStringKernel, AvgDiagKernelNormalizer from modshogun import PluginEstimate#, MSG_DEBUG charfeat=StringCharFeatures(DNA) #charfeat.io.set_loglevel(MSG_DEBUG) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, 0, False) charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, 0, False) pie=PluginEstimate(ppseudo_count,npseudo_count) labels=BinaryLabels(label_train_dna) pie.set_labels(labels) pie.set_features(feats_train) pie.train() kernel=HistogramWordStringKernel(feats_train, feats_train, pie) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) pie.set_features(feats_test) pie.apply().get_labels() km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def kernel_comm_ulong_string_modular (fm_train_dna=traindat,fm_test_dna=testdat, order=3, gap=0, reverse = False): from modshogun import CommUlongStringKernel from modshogun import StringUlongFeatures, StringCharFeatures, DNA from modshogun import SortUlongString charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortUlongString() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringUlongFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor() use_sign=False kernel=CommUlongStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def get_wd_features(data, feat_type="dna"): """ create feature object for wdk """ if feat_type == "dna": feat = StringCharFeatures(DNA) elif feat_type == "protein": feat = StringCharFeatures(PROTEIN) else: raise Exception("unknown feature type") feat.set_features(data) return feat
def get_wd_features(data, feat_type="protein"): """ create feature object for wdk """ if feat_type == "dna": feat = StringCharFeatures(DNA) elif feat_type == "protein": feat = StringCharFeatures(PROTEIN) else: raise Exception("unknown feature type") feat.set_features(data) return feat
def distribution_ppwm_modular(fm_dna=traindna, order=3): from modshogun import StringByteFeatures, StringCharFeatures, DNA from modshogun import PositionalPWM from numpy import array, e, log, exp charfeat = StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats = StringByteFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order - 1, order, 0, False) L = 20 k = 3 sigma = 1 mu = 4 ppwm = PositionalPWM() ppwm.set_sigma(sigma) ppwm.set_mean(mu) pwm = array([[0.0, 0.5, 0.1, 1.0], [0.0, 0.5, 0.5, 0.0], [1.0, 0.0, 0.4, 0.0], [0.0, 0.0, 0.0, 0.0]]) pwm = array([[0.01, 0.09, 0.1], [0.09, 0.01, 0.1], [0.85, 0.4, 0.1], [0.05, 0.5, 0.7]]) ppwm.set_pwm(log(pwm)) #print(ppwm.get_pwm()) ppwm.compute_w(L) w = ppwm.get_w() #print(w) #from pylab import * #figure(1) #pcolor(exp(w)) #pcolor(w) #colorbar() #figure(2) ppwm.compute_scoring(1) u = ppwm.get_scoring(0) #pcolor(exp(u)) #show() #ppwm=PositionalPWM(feats) #ppwm.train() #out_likelihood = histo.get_log_likelihood() #out_sample = histo.get_log_likelihood_sample() return w, u
def distribution_ppwm_modular(fm_dna=traindna, order=3): from modshogun import StringByteFeatures, StringCharFeatures, DNA from modshogun import PositionalPWM from numpy import array, e, log, exp charfeat = StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats = StringByteFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order - 1, order, 0, False) L = 20 k = 3 sigma = 1 mu = 4 ppwm = PositionalPWM() ppwm.set_sigma(sigma) ppwm.set_mean(mu) pwm = array([[0.0, 0.5, 0.1, 1.0], [0.0, 0.5, 0.5, 0.0], [1.0, 0.0, 0.4, 0.0], [0.0, 0.0, 0.0, 0.0]]) pwm = array([[0.01, 0.09, 0.1], [0.09, 0.01, 0.1], [0.85, 0.4, 0.1], [0.05, 0.5, 0.7]]) ppwm.set_pwm(log(pwm)) # print(ppwm.get_pwm()) ppwm.compute_w(L) w = ppwm.get_w() # print(w) # from pylab import * # figure(1) # pcolor(exp(w)) # pcolor(w) # colorbar() # figure(2) ppwm.compute_scoring(1) u = ppwm.get_scoring(0) # pcolor(exp(u)) # show() # ppwm=PositionalPWM(feats) # ppwm.train() # out_likelihood = histo.get_log_likelihood() # out_sample = histo.get_log_likelihood_sample() return w, u
def classifier_svmlight_batch_linadd_modular( fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads ): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel, MSG_DEBUG try: from modshogun import SVMLight except ImportError: print("No support for SVMLight available.") return feats_train = StringCharFeatures(DNA) # feats_train.io.set_loglevel(MSG_DEBUG) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree = 20 kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) # print('SVMLight Objective: %f num_sv: %d' % \) # (svm.get_objective(), svm.get_num_support_vectors()) svm.set_batch_computation_enabled(False) svm.set_linadd_enabled(False) svm.apply().get_labels() svm.set_batch_computation_enabled(True) labels = svm.apply().get_labels() return labels, svm
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel, MSG_DEBUG try: from modshogun import SVMLight except ImportError: print('No support for SVMLight available.') return feats_train = StringCharFeatures(DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree = 20 kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) #print('SVMLight Objective: %f num_sv: %d' % \) # (svm.get_objective(), svm.get_num_support_vectors()) svm.set_batch_computation_enabled(False) svm.set_linadd_enabled(False) svm.apply().get_labels() svm.set_batch_computation_enabled(True) labels = svm.apply().get_labels() return labels, svm
def features_string_sliding_window_modular (strings): from modshogun import StringCharFeatures, DNA from modshogun import DynamicIntArray f=StringCharFeatures([strings], DNA) # slide a window of length 5 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(5,1) #print(f.get_num_vectors()) #print(f.get_vector_length(0)) #print(f.get_vector_length(1)) #print(f.get_features()) # slide a window of length 4 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(4,1) #print(f.get_num_vectors()) #print(f.get_vector_length(0)) #print(f.get_vector_length(1)) #print(f.get_features()) # extract string-windows at position 0,6,16,25 of window size 4 # (memory efficient, does not copy strings) f.set_features([s]) positions=DynamicIntArray() positions.append_element(0) positions.append_element(6) positions.append_element(16) positions.append_element(25) f.obtain_by_position_list(4,positions) #print(f.get_features()) # now extract windows of size 8 from same positon list f.obtain_by_position_list(8,positions) #print(f.get_features()) return f
def distribution_histogram_modular(fm_dna=traindna, order=3, gap=0, reverse=False): from modshogun import StringWordFeatures, StringCharFeatures, DNA from modshogun import Histogram charfeat = StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats = StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order - 1, order, gap, reverse) histo = Histogram(feats) histo.train() histo.get_histogram() num_examples = feats.get_num_vectors() num_param = histo.get_num_model_parameters() # for i in xrange(num_examples): # for j in xrange(num_param): # histo.get_log_derivative(j, i) out_likelihood = histo.get_log_likelihood() out_sample = histo.get_log_likelihood_sample() return histo, out_sample, out_likelihood
def distribution_histogram_modular (fm_dna=traindna,order=3,gap=0,reverse=False): from modshogun import StringWordFeatures, StringCharFeatures, DNA from modshogun import Histogram charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) histo=Histogram(feats) histo.train() histo.get_histogram() num_examples=feats.get_num_vectors() num_param=histo.get_num_model_parameters() #for i in xrange(num_examples): # for j in xrange(num_param): # histo.get_log_derivative(j, i) out_likelihood = histo.get_log_likelihood() out_sample = histo.get_log_likelihood_sample() return histo,out_sample,out_likelihood
def tests_check_commwordkernel_memleak_modular(num, order, gap, reverse): import gc from modshogun import Alphabet, StringCharFeatures, StringWordFeatures, DNA from modshogun import SortWordString, MSG_DEBUG from modshogun import CommWordStringKernel, IdentityKernelNormalizer from numpy import mat POS = [ num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT' ] NEG = [ num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'TTGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT', num * 'ACGT' ] for i in range(10): alpha = Alphabet(DNA) traindat = StringCharFeatures(alpha) traindat.set_features(POS + NEG) trainudat = StringWordFeatures(traindat.get_alphabet()) trainudat.obtain_from_char(traindat, order - 1, order, gap, reverse) #trainudat.io.set_loglevel(MSG_DEBUG) pre = SortWordString() #pre.io.set_loglevel(MSG_DEBUG) pre.init(trainudat) trainudat.add_preprocessor(pre) trainudat.apply_preprocessor() spec = CommWordStringKernel(10, False) spec.set_normalizer(IdentityKernelNormalizer()) spec.init(trainudat, trainudat) K = spec.get_kernel_matrix() del POS del NEG del order del gap del reverse return K
def tests_check_commwordkernel_memleak_modular (num, order, gap, reverse): import gc from modshogun import Alphabet,StringCharFeatures,StringWordFeatures,DNA from modshogun import SortWordString, MSG_DEBUG from modshogun import CommWordStringKernel, IdentityKernelNormalizer from numpy import mat POS=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT'] NEG=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT'] for i in range(10): alpha=Alphabet(DNA) traindat=StringCharFeatures(alpha) traindat.set_features(POS+NEG) trainudat=StringWordFeatures(traindat.get_alphabet()); trainudat.obtain_from_char(traindat, order-1, order, gap, reverse) #trainudat.io.set_loglevel(MSG_DEBUG) pre = SortWordString() #pre.io.set_loglevel(MSG_DEBUG) pre.init(trainudat) trainudat.add_preprocessor(pre) trainudat.apply_preprocessor() spec = CommWordStringKernel(10, False) spec.set_normalizer(IdentityKernelNormalizer()) spec.init(trainudat, trainudat) K=spec.get_kernel_matrix() del POS del NEG del order del gap del reverse return K