def comm_ulong_string (): print 'CommUlongString' from shogun.Kernel import CommUlongStringKernel from shogun.Features import StringUlongFeatures, StringCharFeatures, DNA from shogun.PreProc import SortUlongString order=3 gap=0 reverse=False charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortUlongString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringUlongFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() use_sign=False kernel=CommUlongStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix()
def preprocessor_sortulongstring_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False): from shogun.Kernel import CommUlongStringKernel from shogun.Features import StringCharFeatures, StringUlongFeatures, DNA from shogun.Preprocessor import SortUlongString charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringUlongFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortUlongString() preproc.init(feats_train) feats_train.add_preprocessor(preproc) feats_train.apply_preprocessor() feats_test.add_preprocessor(preproc) feats_test.apply_preprocessor() kernel=CommUlongStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel
def get_kernel_matrix(li): """ Get kernel matrix from a list of strings. """ order = 6 gap = 2 reverse = False charfeat = StringCharFeatures(RAWBYTE) charfeat.set_features(li) #Get alphabet. feats_train = StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) #CommUlongStringKernel needs sorted features. preproc = SortUlongString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() use_sign = False #Compute kernel matrix between train features. kernel = CommUlongStringKernel(feats_train, feats_train, use_sign) km_train = kernel.get_kernel_matrix() return km_train
def get_kernel_matrix(li): """ Get kernel matrix from a list of strings. """ order = 6 gap = 2 reverse = False charfeat = StringCharFeatures(RAWBYTE) charfeat.set_features(li) #Get alphabet. feats_train = StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) #CommUlongStringKernel needs sorted features. preproc = SortUlongString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() use_sign = False #Compute kernel matrix between train features. kernel = CommUlongStringKernel(feats_train, feats_train, use_sign) km_train = kernel.get_kernel_matrix() return km_train
def kernel_comm_ulong_string_modular(fm_train_dna=traindat, fm_test_dna=testdat, order=3, gap=0, reverse=False): from shogun.Kernel import CommUlongStringKernel from shogun.Features import StringUlongFeatures, StringCharFeatures, DNA from shogun.PreProc import SortUlongString charfeat = StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train = StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order - 1, order, gap, reverse) preproc = SortUlongString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat = StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test = StringUlongFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order - 1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() use_sign = False kernel = CommUlongStringKernel(feats_train, feats_train, use_sign) km_train = kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test = kernel.get_kernel_matrix() return km_train, km_test, kernel
def create_kernel(kname, kparam, feats_train): """Call the corresponding constructor for the kernel""" if kname == 'gauss': kernel = GaussianKernel(feats_train, feats_train, kparam['width']) elif kname == 'linear': kernel = LinearKernel(feats_train, feats_train) kernel.set_normalizer(AvgDiagKernelNormalizer(kparam['scale'])) elif kname == 'poly': kernel = PolyKernel(feats_train, feats_train, kparam['degree'], kparam['inhomogene'], kparam['normal']) elif kname == 'wd': kernel = WeightedDegreePositionStringKernel(feats_train, feats_train, kparam['degree']) kernel.set_normalizer( AvgDiagKernelNormalizer(float(kparam['seqlength']))) kernel.set_shifts(kparam['shift'] * numpy.ones(kparam['seqlength'], dtype=numpy.int32)) #kernel=WeightedDegreeStringKernel(feats_train, feats_train, kparam['degree']) elif kname == 'spec': kernel = CommUlongStringKernel(feats_train, feats_train) elif kname == 'cumspec': kernel = WeightedCommWordStringKernel(feats_train, feats_train) kernel.set_weights(numpy.ones(kparam['degree'])) elif kname == 'spec2': kernel = CombinedKernel() k0 = CommWordStringKernel(feats_train['f0'], feats_train['f0']) k0.io.disable_progress() kernel.append_kernel(k0) k1 = CommWordStringKernel(feats_train['f1'], feats_train['f1']) k1.io.disable_progress() kernel.append_kernel(k1) elif kname == 'cumspec2': kernel = CombinedKernel() k0 = WeightedCommWordStringKernel(feats_train['f0'], feats_train['f0']) k0.set_weights(numpy.ones(kparam['degree'])) k0.io.disable_progress() kernel.append_kernel(k0) k1 = WeightedCommWordStringKernel(feats_train['f1'], feats_train['f1']) k1.set_weights(numpy.ones(kparam['degree'])) k1.io.disable_progress() kernel.append_kernel(k1) elif kname == 'localalign': kernel = LocalAlignmentStringKernel(feats_train, feats_train) elif kname == 'localimprove': kernel = LocalityImprovedStringKernel(feats_train, feats_train, kparam['length'],\ kparam['indeg'], kparam['outdeg']) else: print 'Unknown kernel %s' % kname kernel.set_cache_size(32) return kernel
def get_weighted_spectrum_kernel(subfeats_list, options): """build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement) Arguments: subfeats_list -- list of sub-feature objects options -- object containing option data Return: CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel """ kmerlen = options.kmerlen kmerlen2 = options.kmerlen2 subkernels = 0 kernel = CombinedKernel() feats = CombinedFeatures() weights = [] i = 0 for subfeats in subfeats_list: feats.append_feature_obj(subfeats) combine_kcount = Counter() for i in xrange(subfeats.get_num_vectors()): fv = list(subfeats.get_feature_vector(i)) combine_kcount += Counter(fv) number = len(combine_kcount) klen = kmerlen + i for k in xrange(kmerlen, kmerlen2 + 1): if k <= 8: subkernel = CommWordStringKernel(10, False) else: subkernel = CommUlongStringKernel(10, False) kernel.append_kernel(subkernel) subkernels += 1 kernel.init(feats, feats) # here the weight for each k-mer is uniform ''' subkernels = 8 numpy.array([1 / float(subkernels)] * subkernels, numpy.dtype('float64')) array([ 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125, 0.125]) ''' kernel.set_subkernel_weights( numpy.array([1 / float(subkernels)] * subkernels, numpy.dtype('float64'))) return kernel
def get_spectrum_kernel(feats, options): """build spectrum kernel with non-redundant k-mer list (removing reverse complement) Arguments: feats -- feature object options -- object containing option data Return: StringWord(Ulong)Features, CommWord(Ulong)StringKernel """ if options.kmerlen <= 8: return CommWordStringKernel(feats, feats) else: return CommUlongStringKernel(feats, feats)
def get_weighted_spectrum_kernel(subfeats_list, options): """build weighted spectrum kernel with non-redundant k-mer list (removing reverse complement) Arguments: subfeats_list -- list of sub-feature objects options -- object containing option data Return: CombinedFeatures of StringWord(Ulong)Features, CombinedKernel of CommWord(Ulong)StringKernel """ kmerlen = options.kmerlen kmerlen2 = options.kmerlen2 subkernels = 0 kernel = CombinedKernel() feats = CombinedFeatures() for subfeats in subfeats_list: feats.append_feature_obj(subfeats) for k in xrange(kmerlen, kmerlen2 + 1): if k <= 8: subkernel = CommWordStringKernel(10, False) else: subkernel = CommUlongStringKernel(10, False) kernel.append_kernel(subkernel) subkernels += 1 kernel.init(feats, feats) kernel.set_subkernel_weights( numpy.array([1 / float(subkernels)] * subkernels, numpy.dtype('float64'))) return kernel