def evaluate(options, svm, kernel, features, motifs): """Evaluate examples using a trained kernel""" query = MotifFinder(finder_settings=MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width)) query.setFastaFile(options.query) query.setMotifs(options.qgff) qmotifs, qpositions = query.getResults() feats_query = CombinedFeatures() wds_svm = EasySVM.EasySVM(kirmes_ini.WDS_KERNEL_PARAMETERS) try: assert set(qmotifs.keys()).issuperset(set(motifs)) except AssertionError: print "The motif positions in the query sequence are incomplete, there are no positions for:" print set(motifs).difference(qmotifs.keys()) raise for motif in motifs: feats_query.append_feature_obj(wds_svm.createFeatures(qmotifs[motif])) query_positions = array(qpositions, dtype=float64) query_positions = query_positions.T rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS) feats_query.append_feature_obj(rbf_svm.createFeatures(query_positions)) kernel.init(features, feats_query) out = svm.classify().get_labels() qgenes = query.getGenes() ret_str = "" print "#example\toutput\tsplit" for i in xrange(len(out)): if out[i] >= 0: classif = "\tpositive\t" else: classif = "\tnegative\t" ret_str += qgenes[i] + classif + str(out[i]) + "\n" print str(i) + "\t" + str(out[i]) + "\t0" return ret_str
def evaluate(options, svm, kernel, features, motifs): """Evaluate examples using a trained kernel""" query = MotifFinder(finder_settings=MotifFinderSettings( kirmes_ini.MOTIF_LENGTH, options.window_width)) query.setFastaFile(options.query) query.setMotifs(options.qgff) qmotifs, qpositions = query.getResults() feats_query = CombinedFeatures() wds_svm = EasySVM.EasySVM(kirmes_ini.WDS_KERNEL_PARAMETERS) try: assert set(qmotifs.keys()).issuperset(set(motifs)) except AssertionError: print "The motif positions in the query sequence are incomplete, there are no positions for:" print set(motifs).difference(qmotifs.keys()) raise for motif in motifs: feats_query.append_feature_obj(wds_svm.createFeatures(qmotifs[motif])) query_positions = array(qpositions, dtype=float64) query_positions = query_positions.T rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS) feats_query.append_feature_obj(rbf_svm.createFeatures(query_positions)) kernel.init(features, feats_query) out = svm.classify().get_labels() qgenes = query.getGenes() ret_str = "" print "#example\toutput\tsplit" for i in xrange(len(out)): if out[i] >= 0: classif = "\tpositive\t" else: classif = "\tnegative\t" ret_str += qgenes[i] + classif + str(out[i]) + "\n" print str(i) + "\t" + str(out[i]) + "\t0" return ret_str
def training_run(options): """Conduct a training run and return a trained SVM kernel""" settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width, options.replace) positives = MotifFinder(finder_settings=settings) positives.setFastaFile(options.positives) positives.setMotifs(options.pgff) pmotifs, ppositions = positives.getResults() negatives = MotifFinder(finder_settings=settings) negatives.setFastaFile(options.negatives) negatives.setMotifs(options.ngff) nmotifs, npositions = negatives.getResults() wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS wds_svm = EasySVM.EasySVM(wds_kparams) num_positives = len(pmotifs.values()[0]) num_negatives = len(nmotifs.values()[0]) # Creating Kernel Objects kernel = CombinedKernel() features = CombinedFeatures() kernel_array = [] motifs = pmotifs.keys() motifs.sort() # Adding Kmer Kernels for motif in motifs: all_examples = pmotifs[motif] + nmotifs[motif] motif_features = wds_svm.createFeatures(all_examples) wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, wds_kparams["degree"]) wds_kernel.set_shifts(wds_kparams["shift"] * ones(wds_kparams["seqlength"], dtype=int32)) features.append_feature_obj(motif_features) kernel_array.append(wds_kernel) kernel.append_kernel(wds_kernel) rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS) positions = array(ppositions + npositions, dtype=float64).T position_features = rbf_svm.createFeatures(positions) features.append_feature_obj(position_features) motif_labels = append(ones(num_positives), -ones(num_negatives)) complete_labels = Labels(motif_labels) rbf_kernel = GaussianKernel(position_features, position_features, kirmes_ini.RBF_KERNEL_PARAMETERS["width"]) kernel_array.append(rbf_kernel) kernel.append_kernel(rbf_kernel) # Kernel init kernel.init(features, features) kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE) svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels) svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS) # Training svm.train() if not os.path.exists(options.output_path): os.mkdir(options.output_path) html = {} if options.contrib: html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array, motifs) if options.logos: html["poims"] = poims(svm, kernel, kernel_array, motifs, options.output_path) if options.query: html["query"] = evaluate(options, svm, kernel, features, motifs) htmlize(html, options.output_html)
def training_run(options): """Conduct a training run and return a trained SVM kernel""" settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width, options.replace) positives = MotifFinder(finder_settings=settings) positives.setFastaFile(options.positives) positives.setMotifs(options.pgff) pmotifs, ppositions = positives.getResults() negatives = MotifFinder(finder_settings=settings) negatives.setFastaFile(options.negatives) negatives.setMotifs(options.ngff) nmotifs, npositions = negatives.getResults() wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS wds_svm = EasySVM.EasySVM(wds_kparams) num_positives = len(pmotifs.values()[0]) num_negatives = len(nmotifs.values()[0]) #Creating Kernel Objects kernel = CombinedKernel() features = CombinedFeatures() kernel_array = [] motifs = pmotifs.keys() motifs.sort() #Adding Kmer Kernels for motif in motifs: all_examples = pmotifs[motif] + nmotifs[motif] motif_features = wds_svm.createFeatures(all_examples) wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, \ wds_kparams['degree']) wds_kernel.set_shifts(wds_kparams['shift'] * ones(wds_kparams['seqlength'], dtype=int32)) features.append_feature_obj(motif_features) kernel_array.append(wds_kernel) kernel.append_kernel(wds_kernel) rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS) positions = array(ppositions + npositions, dtype=float64).T position_features = rbf_svm.createFeatures(positions) features.append_feature_obj(position_features) motif_labels = append(ones(num_positives), -ones(num_negatives)) complete_labels = Labels(motif_labels) rbf_kernel = GaussianKernel(position_features, position_features, \ kirmes_ini.RBF_KERNEL_PARAMETERS['width']) kernel_array.append(rbf_kernel) kernel.append_kernel(rbf_kernel) #Kernel init kernel.init(features, features) kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE) svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels) svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS) #Training svm.train() if not os.path.exists(options.output_path): os.mkdir(options.output_path) html = {} if options.contrib: html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array, motifs) if options.logos: html["poims"] = poims(svm, kernel, kernel_array, motifs, options.output_path) if options.query: html["query"] = evaluate(options, svm, kernel, features, motifs) htmlize(html, options.output_html)