def classifier_domainadaptationsvm_modular (fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna, \ label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \ label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3): feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.train() #svm.io.set_loglevel(MSG_DEBUG) ##################################### #print("obtaining DA SVM from previously trained SVM") feats_train2 = StringCharFeatures(fm_train_dna, DNA) feats_test2 = StringCharFeatures(fm_test_dna, DNA) kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels2 = BinaryLabels(label_train_dna) # we regularize against the previously obtained solution dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0) dasvm.train() out = dasvm.apply_binary(feats_test2) return out #,dasvm TODO
def classifier_svmlight_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, C=1.2, epsilon=1e-5, num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print('No support for SVMLight available.') return feats_train = StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree = 20 kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.apply().get_labels() return kernel
def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna,degree=3, \ C=10,epsilon=1e-5,num_threads=1): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel try: from modshogun import SVMLight except ImportError: print("SVMLight is not available") exit(0) feats_train = StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term( -numpy.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 6], dtype=numpy.double)) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.apply().get_labels() return out, kernel
def svm_process(args_tuple): X_train, Y_train, X_test, Y_test, d, c = args_tuple kernel = WeightedDegreePositionStringKernel(X_train, X_train, d) kernel.set_shifts(np.ones(SEQ_LEN, dtype=np.int32)) kernel.set_position_weights(np.ones(SEQ_LEN, dtype=np.float64)) kernel.init(X_train, X_train) model = SVMLight(c, kernel, Y_train) model.train() Y_test_pred = model.apply(X_test).get_labels() Y_test_dist = model.apply(X_test).get_values() Y_test_proba = np.exp(Y_test_dist) / (1 + np.exp(Y_test_dist)) accuracy = np.where(Y_test_pred - Y_test == 0)[0].size * 1.0 / Y_test.size return (accuracy, Y_test_proba)
def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads): from modshogun import StringCharFeatures, BinaryLabels, DNA from modshogun import WeightedDegreeStringKernel, MSG_DEBUG try: from modshogun import SVMLight except ImportError: print('No support for SVMLight available.') return feats_train = StringCharFeatures(DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_train.set_features(fm_train_dna) feats_test = StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree = 20 kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = BinaryLabels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) #print('SVMLight Objective: %f num_sv: %d' % \) # (svm.get_objective(), svm.get_num_support_vectors()) svm.set_batch_computation_enabled(False) svm.set_linadd_enabled(False) svm.apply().get_labels() svm.set_batch_computation_enabled(True) labels = svm.apply().get_labels() return labels, svm
def serialization_string_kernels_modular(n_data, num_shifts, size): """ serialize svm with string kernels """ ################################################## # set up toy data and svm train_xt, train_lt = generate_random_data(n_data) test_xt, test_lt = generate_random_data(n_data) feats_train = construct_features(train_xt) feats_test = construct_features(test_xt) max_len = len(train_xt[0]) kernel_wdk = WeightedDegreePositionStringKernel(size, 5) shifts_vector = numpy.ones(max_len, dtype=numpy.int32) * num_shifts kernel_wdk.set_shifts(shifts_vector) ######## # set up spectrum use_sign = False kernel_spec_1 = WeightedCommWordStringKernel(size, use_sign) kernel_spec_2 = WeightedCommWordStringKernel(size, use_sign) ######## # combined kernel kernel = CombinedKernel() kernel.append_kernel(kernel_wdk) kernel.append_kernel(kernel_spec_1) kernel.append_kernel(kernel_spec_2) # init kernel labels = BinaryLabels(train_lt) svm = SVMLight(1.0, kernel, labels) #svm.io.set_loglevel(MSG_DEBUG) svm.train(feats_train) ################################################## # serialize to file fn = "serialized_svm.bz2" #print("serializing SVM to file", fn) save(fn, svm) ################################################## # unserialize and sanity check #print("unserializing SVM") svm2 = load(fn) #print("comparing predictions") out = svm.apply(feats_test).get_labels() out2 = svm2.apply(feats_test).get_labels() # assert outputs are close for i in range(len(out)): assert abs(out[i] - out2[i] < 0.000001) #print("all checks passed.") return out, out2
def serialization_svmlight_modular(num, dist, width, C): from modshogun import MSG_DEBUG from modshogun import RealFeatures, BinaryLabels, DNA, Alphabet from modshogun import WeightedDegreeStringKernel, GaussianKernel try: from modshogun import SVMLight except ImportError: print("SVMLight not available") exit(0) from numpy import concatenate, ones from numpy.random import randn, seed import sys import types import random import bz2 import pickle import inspect def save(filename, myobj): """ save object to file using pickle @param filename: name of destination file @type filename: str @param myobj: object to save (has to be pickleable) @type myobj: obj """ try: f = bz2.BZ2File(filename, 'wb') except IOError as details: sys.stderr.write('File ' + filename + ' cannot be written\n') sys.stderr.write(details) return pickle.dump(myobj, f, protocol=2) f.close() def load(filename): """ Load from filename using pickle @param filename: name of file to load from @type filename: str """ try: f = bz2.BZ2File(filename, 'rb') except IOError as details: sys.stderr.write('File ' + filename + ' cannot be read\n') sys.stderr.write(details) return myobj = pickle.load(f) f.close() return myobj ################################################## # set up toy data and svm traindata_real = concatenate((randn(2, num) - dist, randn(2, num) + dist), axis=1) testdata_real = concatenate((randn(2, num) - dist, randn(2, num) + dist), axis=1) trainlab = concatenate((-ones(num), ones(num))) testlab = concatenate((-ones(num), ones(num))) feats_train = RealFeatures(traindata_real) feats_test = RealFeatures(testdata_real) kernel = GaussianKernel(feats_train, feats_train, width) #kernel.io.set_loglevel(MSG_DEBUG) labels = BinaryLabels(trainlab) svm = SVMLight(C, kernel, labels) svm.train() #svm.io.set_loglevel(MSG_DEBUG) ################################################## # serialize to file fn = "serialized_svm.bz2" #print("serializing SVM to file", fn) save(fn, svm) ################################################## # unserialize and sanity check #print("unserializing SVM") svm2 = load(fn) #print("comparing objectives") svm2.train() #print("objective before serialization:", svm.get_objective()) #print("objective after serialization:", svm2.get_objective()) #print("comparing predictions") out = svm.apply(feats_test).get_labels() out2 = svm2.apply(feats_test).get_labels() # assert outputs are close for i in range(len(out)): assert abs(out[i] - out2[i] < 0.000001) #print("all checks passed.") return True