def _run_feats_string_complex (): """Run distances with complex StringFeatures, like WordString.""" params={ 'accuracy': 1e-7, 'feature_class': 'string_complex', 'feature_type': 'Word', 'data': dataop.get_dna(num_vec_test=dataop.NUM_VEC_TRAIN+42) } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['name']='CanberraWordDistance' _compute(feats, params) params['accuracy']=1e-8 params['name']='ManhattanWordDistance' _compute(feats, params) params['name']='HammingWordDistance' params['args']={'key': ('use_sign',), 'val': (False,)} _compute(feats, params) params['name']='HammingWordDistance' params['args']={'key': ('use_sign',), 'val': (True,)} _compute(feats, params)
def _run(name): """Run generator for a specific distribution method. @param name Name of the distribtuion method """ # put some constantness into randomness Math_init_random(INIT_RANDOM) params = { 'name': name, 'accuracy': 1e-7, 'data': dataop.get_dna(), 'alphabet': 'DNA', 'feature_class': 'string_complex', 'feature_type': 'Word' } output = fileop.get_output(category.DISTRIBUTION, params) feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) dfun = eval('distribution.' + name) dist = dfun(feats['train']) dist.train() output[PREFIX + 'likelihood'] = dist.get_log_likelihood_sample() output[PREFIX + 'derivatives'] = _get_derivatives( dist, feats['train'].get_num_vectors()) fileop.write(category.DISTRIBUTION, output)
def _run_feats_string_complex(): """Run distances with complex StringFeatures, like WordString.""" params = { 'accuracy': 1e-7, 'feature_class': 'string_complex', 'feature_type': 'Word', 'data': dataop.get_dna(num_vec_test=dataop.NUM_VEC_TRAIN + 42) } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) params['name'] = 'CanberraWordDistance' _compute(feats, params) params['accuracy'] = 1e-8 params['name'] = 'ManhattanWordDistance' _compute(feats, params) params['name'] = 'HammingWordDistance' params['args'] = {'key': ('use_sign', ), 'val': (False, )} _compute(feats, params) params['name'] = 'HammingWordDistance' params['args'] = {'key': ('use_sign', ), 'val': (True, )} _compute(feats, params)
def _run (name): """Run generator for a specific distribution method. @param name Name of the distribtuion method """ # put some constantness into randomness Math_init_random(INIT_RANDOM) params={ 'name': name, 'accuracy': 1e-7, 'data':dataop.get_dna(), 'alphabet': 'DNA', 'feature_class': 'string_complex', 'feature_type': 'Word' } output=fileop.get_output(category.DISTRIBUTION, params) feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) dfun=eval('distribution.'+name) dist=dfun(feats['train']) dist.train() output[PREFIX+'likelihood']=dist.get_log_likelihood_sample() output[PREFIX+'derivatives']=_get_derivatives( dist, feats['train'].get_num_vectors()) fileop.write(category.DISTRIBUTION, output)
def _run_feats_string_complex(): """Run kernel with complex StringFeatures.""" params = {"data": dataop.get_dna(), "feature_class": "string_complex"} params["feature_type"] = "Word" wordfeats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) params["name"] = "CommWordString" params["accuracy"] = 1e-9 params["args"] = {"key": ("size", "use_sign"), "val": (10, False)} _compute(wordfeats, params) params["name"] = "WeightedCommWordString" _compute(wordfeats, params) params["name"] = "PolyMatchWordString" params["accuracy"] = 1e-10 params["args"] = {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)} _compute(wordfeats, params) params["args"]["val"] = (10, 3, False) _compute(wordfeats, params) params["name"] = "MatchWordString" params["args"] = {"key": ("size", "degree"), "val": (10, 3)} _compute(wordfeats, params) params["feature_type"] = "Ulong" params["accuracy"] = 1e-9 ulongfeats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) params["name"] = "CommUlongString" params["args"] = {"key": ("size", "use_sign"), "val": (10, False)} _compute(ulongfeats, params)
def _run_combined (): """Run Combined kernel.""" kern=kernel.CombinedKernel() feats={'train': CombinedFeatures(), 'test': CombinedFeatures()} output={} params={ 'name': 'Combined', 'accuracy': 1e-7 } subkdata=[ { 'name': 'FixedDegreeString', 'feature_class': 'string', 'feature_type': 'Char', 'args': {'key': ('size', 'degree'), 'val': (10, 3)} }, { 'name': 'PolyMatchString', 'feature_class': 'string', 'feature_type': 'Char', 'args': { 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 3, True) } }, { 'name': 'LocalAlignmentString', 'feature_class': 'string', 'feature_type': 'Char', 'args': {'key': ('size',), 'val': (10,)} } ] i=0 for sd in subkdata: kfun=eval('kernel.'+sd['name']+'Kernel') subk=kfun(*sd['args']['val']) sd['data']=dataop.get_dna() subkfeats=featop.get_features( sd['feature_class'], sd['feature_type'], sd['data']) output.update( fileop.get_output(category.KERNEL, sd, 'subkernel'+str(i)+'_')) kern.append_kernel(subk) feats['train'].append_feature_obj(subkfeats['train']) feats['test'].append_feature_obj(subkfeats['test']) i+=1 output.update(fileop.get_output(category.KERNEL, params)) kern.init(feats['train'], feats['train']) output['kernel_matrix_train']=kern.get_kernel_matrix() kern.init(feats['train'], feats['test']) output['kernel_matrix_test']=kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _run_pie(): """Run kernel with PluginEstimate.""" params = {"data": dataop.get_dna(), "accuracy": 1e-6, "feature_class": "string_complex", "feature_type": "Word"} feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) params["name"] = "HistogramWordString" _compute_pie(feats, params) params["name"] = "SalzbergWordString" _compute_pie(feats, params)
def _run_feats_string(): """Run kernel with StringFeatures.""" params = {"accuracy": 1e-9, "data": dataop.get_dna(), "feature_class": "string", "feature_type": "Char"} feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) params["name"] = "FixedDegreeString" params["args"] = {"key": ("size", "degree"), "val": (10, 3)} _compute(feats, params) params["accuracy"] = 0 params["name"] = "LocalAlignmentString" params["args"] = {"key": ("size",), "val": (10,)} _compute(feats, params) params["accuracy"] = 1e-10 params["name"] = "PolyMatchString" params["args"] = {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)} _compute(feats, params) params["args"]["val"] = (10, 3, False) _compute(feats, params) params["accuracy"] = 1e-15 params["name"] = "SimpleLocalityImprovedString" params["args"] = {"key": ("size", "length", "inner_degree", "outer_degree"), "val": (10, 5, 7, 5)} _compute(feats, params) # buggy: # params['name']='LocalityImprovedString' # _compute(feats, params) params["name"] = "WeightedDegreeString" params["accuracy"] = 1e-9 params["args"] = {"key": ("degree",), "val": (20,)} _compute(feats, params) params["args"] = {"key": ("degree",), "val": (1,)} _compute(feats, params) params["name"] = "WeightedDegreePositionString" params["args"] = {"key": ("size", "degree"), "val": (10, 20)} _compute(feats, params) params["args"] = {"key": ("size", "degree"), "val": (10, 1)} _compute(feats, params) params["name"] = "OligoString" params["args"] = {"key": ("size", "k", "width"), "val": (10, 3, 1.2)} _compute(feats, params) params["args"] = {"key": ("size", "k", "width"), "val": (10, 4, 1.7)} _compute(feats, params) params["name"] = "LinearString" params["accuracy"] = 1e-8 params["normalizer"] = kernel.AvgDiagKernelNormalizer() del params["args"] _compute(feats, params)
def _run_combined(): """Run Combined kernel.""" kern = kernel.CombinedKernel() feats = {"train": CombinedFeatures(), "test": CombinedFeatures()} output = {} params = {"name": "Combined", "accuracy": 1e-7} subkdata = [ { "name": "FixedDegreeString", "feature_class": "string", "feature_type": "Char", "args": {"key": ("size", "degree"), "val": (10, 3)}, }, { "name": "PolyMatchString", "feature_class": "string", "feature_type": "Char", "args": {"key": ("size", "degree", "inhomogene"), "val": (10, 3, True)}, }, { "name": "LocalAlignmentString", "feature_class": "string", "feature_type": "Char", "args": {"key": ("size",), "val": (10,)}, }, ] i = 0 for sd in subkdata: kfun = eval("kernel." + sd["name"] + "Kernel") subk = kfun(*sd["args"]["val"]) sd["data"] = dataop.get_dna() subkfeats = featop.get_features(sd["feature_class"], sd["feature_type"], sd["data"]) output.update(fileop.get_output(category.KERNEL, sd, "subkernel" + str(i) + "_")) kern.append_kernel(subk) feats["train"].append_feature_obj(subkfeats["train"]) feats["test"].append_feature_obj(subkfeats["test"]) i += 1 output.update(fileop.get_output(category.KERNEL, params)) kern.init(feats["train"], feats["train"]) output["kernel_matrix_train"] = kern.get_kernel_matrix() kern.init(feats["train"], feats["test"]) output["kernel_matrix_test"] = kern.get_kernel_matrix() fileop.write(category.KERNEL, output)
def _run_pie (): """Run kernel with PluginEstimate.""" params={ 'data': dataop.get_dna(), 'accuracy': 1e-6, 'feature_class': 'string_complex', 'feature_type': 'Word' } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['name']='HistogramWordString' _compute_pie(feats, params) params['name']='SalzbergWordString' _compute_pie(feats, params)
def _run_pie(): """Run kernel with PluginEstimate.""" params = { 'data': dataop.get_dna(), 'accuracy': 1e-6, 'feature_class': 'string_complex', 'feature_type': 'Word' } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) params['name'] = 'HistogramWordString' _compute_pie(feats, params) params['name'] = 'SalzbergWordString' _compute_pie(feats, params)
def _run_mindygram (): """Run Mindygram kernel.""" return params={ 'name': 'MindyGram', 'accuracy': 1e-8, 'data': dataop.get_dna(), 'feature_class': 'mindy', 'args': {'key': ('measure', 'width'), 'val': ('MEASURE', 1.5)} } feats={ 'train': MindyGramFeatures('DNA', 'freq', '%20.,', 0), 'test': MindyGramFeatures('DNA', 'freq', '%20.,', 0) } _compute(feats, params)
def _run_mindygram(): """Run Mindygram kernel.""" return params = { "name": "MindyGram", "accuracy": 1e-8, "data": dataop.get_dna(), "feature_class": "mindy", "args": {"key": ("measure", "width"), "val": ("MEASURE", 1.5)}, } feats = { "train": MindyGramFeatures("DNA", "freq", "%20.,", 0), "test": MindyGramFeatures("DNA", "freq", "%20.,", 0), } _compute(feats, params)
def _run_svm_linear (): """Run all SVMs based on (Sparse) Linear Classifiers.""" params={ 'type': 'linear', 'bias_enabled': False, 'data': dataop.get_clouds(2), 'feature_class': 'simple', 'feature_type': 'Real', 'label_type': 'twoclass' } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data'], sparse=True) svms=('LibLinear', 'SVMLin', 'SVMSGD') params['bias_enabled']=True _loop_svm(svms, params, feats) # SubGradientSVM needs max_train_time to terminate svms=('SubGradientSVM',) params['bias_enabled']=False params['max_train_time']=.5 # up to 2. does not improve test results :( _loop_svm(svms, params, feats) svms=('SVMOcas',) _loop_svm(svms, params, feats) params={ 'type': 'linear', 'bias_enabled': False, 'label_type': 'twoclass', 'feature_class': 'wd', 'feature_type': 'Byte', 'data': dataop.get_dna(), 'alphabet': 'RAWDNA', 'order': 1 } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data'], params['order']) _loop_svm(svms, params, feats)
def _run_string_complex(ftype): """Run preprocessor applied on complex StringFeatures. @param ftype Feature type, like Word """ params = { "name": "Comm" + ftype + "String", "accuracy": 1e-9, "feature_class": "string_complex", "feature_type": ftype, "data": dataop.get_dna(), } feats = featop.get_features(params["feature_class"], params["feature_type"], params["data"]) # string_complex gets preproc added implicitely on Word/Ulong feats output = _compute(feats, params) params = {"name": "Sort" + ftype + "String"} output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _run_feats_string_complex (): """Run kernel with complex StringFeatures.""" params={ 'data': dataop.get_dna(), 'feature_class': 'string_complex' } params['feature_type']='Word' wordfeats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['name']='CommWordString' params['accuracy']=1e-9 params['args']={'key': ('size', 'use_sign'), 'val': (10, False)} _compute(wordfeats, params) params['name']='WeightedCommWordString' _compute(wordfeats, params) params['name']='PolyMatchWordString' params['accuracy']=1e-10 params['args']={ 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 3, True) } _compute(wordfeats, params) params['args']['val']=(10, 3, False) _compute(wordfeats, params) params['name']='MatchWordString' params['args']={'key': ('size', 'degree'), 'val': (10, 3)} _compute(wordfeats, params) params['feature_type']='Ulong' params['accuracy']=1e-9 ulongfeats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['name']='CommUlongString' params['args']={'key': ('size', 'use_sign'), 'val': (10, False)} _compute(ulongfeats, params)
def _run_string_complex(ftype): """Run preprocessor applied on complex StringFeatures. @param ftype Feature type, like Word """ params = { 'name': 'Comm' + ftype + 'String', 'accuracy': 1e-9, 'feature_class': 'string_complex', 'feature_type': ftype, 'data': dataop.get_dna() } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) # string_complex gets preproc added implicitely on Word/Ulong feats output = _compute(feats, params) params = {'name': 'Sort' + ftype + 'String'} output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _run_string_complex (ftype): """Run preprocessor applied on complex StringFeatures. @param ftype Feature type, like Word """ params={ 'name': 'Comm'+ftype+'String', 'accuracy': 1e-9, 'feature_class': 'string_complex', 'feature_type': ftype, 'data': dataop.get_dna() } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) # string_complex gets preproc added implicitely on Word/Ulong feats output=_compute(feats, params) params={ 'name': 'Sort'+ftype+'String' } output.update(fileop.get_output(category.PREPROC, params)) fileop.write(category.PREPROC, output)
def _run_wdsvmocas (): """Run Weighted Degree SVM Ocas classifier.""" svms=('WDSVMOcas',) params={ 'type': 'wdsvmocas', 'degree': 1, 'bias_enabled': False, #'data': dataop.get_rawdna(), 'data': dataop.get_dna( dataop.NUM_VEC_TRAIN, dataop.NUM_VEC_TRAIN, dataop.NUM_VEC_TRAIN), 'feature_class': 'string_complex', 'feature_type': 'Byte', 'alphabet': 'RAWDNA', 'label_type': 'twoclass', 'order': 1, 'gap': 0, 'reverse': False } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data'], eval(params['alphabet']), params['order'], params['gap'], params['reverse']) _loop_svm(svms, params, feats)
def _run_svm_kernel (): """Run all kernel-based SVMs.""" kparams={ 'name': 'Gaussian', 'args': {'key': ('width',), 'val': (1.5,)}, 'feature_class': 'simple', 'feature_type': 'Real', 'data': dataop.get_clouds(2) } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=GaussianKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) svms=('SVMLight', 'LibSVM', 'GPBTSVM', 'MPDSVM') params={ 'type': 'kernel', 'label_type': 'twoclass' } _loop_svm(svms, params, feats, kernel, output) svms=('LibSVMOneClass',) params['label_type']=None _loop_svm(svms, params, feats, kernel, output) svms=('LibSVMMultiClass', 'GMNPSVM') params['label_type']='series' kparams['data']=dataop.get_clouds(3) feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) svms=('SVMLight', 'GPBTSVM') params['label_type']='twoclass' kparams={ 'name': 'Linear', 'feature_class': 'simple', 'feature_type': 'Real', 'data': dataop.get_clouds(2), 'normalizer': AvgDiagKernelNormalizer() } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=LinearKernel() kernel.set_normalizer(kparams['normalizer']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'CommWordString', 'args': {'key': ('use_sign',), 'val': (False,)}, 'data': dataop.get_dna(), 'feature_class': 'string_complex', 'feature_type': 'Word' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=CommWordStringKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'CommUlongString', 'args': {'key': ('use_sign',), 'val': (False,)}, 'data': dataop.get_dna(), 'feature_class': 'string_complex', 'feature_type': 'Ulong' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=CommUlongStringKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'WeightedDegreeString', 'args': {'key': ('degree',), 'val': (3,)}, 'data': dataop.get_dna(), 'feature_class': 'string', 'feature_type': 'Char' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=WeightedDegreeStringKernel(*kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) _loop_svm(svms, params, feats, kernel, output) params['linadd_enabled']=True _loop_svm(svms, params, feats, kernel, output) params['batch_enabled']=True _loop_svm(svms, params, feats, kernel, output) kparams={ 'name': 'WeightedDegreePositionString', 'args': {'key': ('degree',), 'val': (20,)}, 'data': dataop.get_dna(), 'feature_class': 'string', 'feature_type': 'Char' } feats=featop.get_features( kparams['feature_class'], kparams['feature_type'], kparams['data']) kernel=WeightedDegreePositionStringKernel(10, *kparams['args']['val']) output=fileop.get_output(category.KERNEL, kparams) del params['linadd_enabled'] del params['batch_enabled'] _loop_svm(svms, params, feats, kernel, output) params['linadd_enabled']=True _loop_svm(svms, params, feats, kernel, output) params['batch_enabled']=True _loop_svm(svms, params, feats, kernel, output)
def _run_feats_string (): """Run kernel with StringFeatures.""" params = { 'accuracy': 1e-9, 'data': dataop.get_dna(), 'feature_class': 'string', 'feature_type': 'Char', } feats=featop.get_features( params['feature_class'], params['feature_type'], params['data']) params['name']='FixedDegreeString' params['args']={'key': ('size', 'degree'), 'val': (10, 3)} _compute(feats, params) params['accuracy']=0 params['name']='LocalAlignmentString' params['args']={'key': ('size',), 'val': (10,)} _compute(feats, params) params['accuracy']=1e-10 params['name']='PolyMatchString' params['args']={ 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 3, True) } _compute(feats, params) params['args']['val']=(10, 3, False) _compute(feats, params) params['accuracy']=1e-15 params['name']='SimpleLocalityImprovedString' params['args']={ 'key': ('size', 'length', 'inner_degree', 'outer_degree'), 'val': (10, 5, 7, 5) } _compute(feats, params) # buggy: #params['name']='LocalityImprovedString' #_compute(feats, params) params['name']='WeightedDegreeString' params['accuracy']=1e-9 params['args']={'key': ('degree',), 'val': (20,)} _compute(feats, params) params['args']={'key': ('degree',), 'val': (1,)} _compute(feats, params) params['name']='WeightedDegreePositionString' params['args']={'key': ('size', 'degree'), 'val': (10, 20)} _compute(feats, params) params['args']={'key': ('size', 'degree'), 'val': (10, 1)} _compute(feats, params) params['name']='OligoString' params['args']={'key': ('size', 'k', 'width'), 'val': (10, 3, 1.2)} _compute(feats, params) params['args']={'key': ('size', 'k', 'width'), 'val': (10, 4, 1.7)} _compute(feats, params) params['name']='LinearString' params['accuracy']=1e-8 params['normalizer']=kernel.AvgDiagKernelNormalizer() del params['args'] _compute(feats, params)
def _run_feats_string(): """Run kernel with StringFeatures.""" params = { 'accuracy': 1e-9, 'data': dataop.get_dna(), 'feature_class': 'string', 'feature_type': 'Char', } feats = featop.get_features(params['feature_class'], params['feature_type'], params['data']) params['name'] = 'FixedDegreeString' params['args'] = {'key': ('size', 'degree'), 'val': (10, 3)} _compute(feats, params) params['accuracy'] = 0 params['name'] = 'LocalAlignmentString' params['args'] = {'key': ('size', ), 'val': (10, )} _compute(feats, params) params['accuracy'] = 1e-10 params['name'] = 'PolyMatchString' params['args'] = { 'key': ('size', 'degree', 'inhomogene'), 'val': (10, 3, True) } _compute(feats, params) params['args']['val'] = (10, 3, False) _compute(feats, params) params['accuracy'] = 1e-15 params['name'] = 'SimpleLocalityImprovedString' params['args'] = { 'key': ('size', 'length', 'inner_degree', 'outer_degree'), 'val': (10, 5, 7, 5) } _compute(feats, params) # buggy: #params['name']='LocalityImprovedString' #_compute(feats, params) params['name'] = 'WeightedDegreeString' params['accuracy'] = 1e-9 params['args'] = {'key': ('degree', ), 'val': (20, )} _compute(feats, params) params['args'] = {'key': ('degree', ), 'val': (1, )} _compute(feats, params) params['name'] = 'WeightedDegreePositionString' params['args'] = {'key': ('size', 'degree'), 'val': (10, 20)} _compute(feats, params) params['args'] = {'key': ('size', 'degree'), 'val': (10, 1)} _compute(feats, params) params['name'] = 'OligoString' params['args'] = {'key': ('size', 'k', 'width'), 'val': (10, 3, 1.2)} _compute(feats, params) params['args'] = {'key': ('size', 'k', 'width'), 'val': (10, 4, 1.7)} _compute(feats, params) params['name'] = 'LinearString' params['accuracy'] = 1e-8 params['normalizer'] = kernel.AvgDiagKernelNormalizer() del params['args'] _compute(feats, params)