def poims(svm, kernel, kernel_array, motifs, path): """Plot POIMs and sequence logos from WD kernels""" html = "\n\n<h2>POIMs and Sequence Logos</h2>\n" html += "<table>\n<tr><th>Motif</th>\n<th>POIM</th>\n<th>Sequence Logo</th>\n</tr>\n" for i in xrange(kernel.get_num_subkernels() - 1): (poim, max_poim, diff_poim, poim_totalmass) = EasySVM.computePOIMs( svm, kernel_array[i], kirmes_ini.MOTIF_LENGTH, kirmes_ini.MOTIF_WINDOW_WIDTH ) motif_filename = os.path.join(path, motifs[i]) EasySVM.plotPOIMs( motif_filename + "_poim.png", poim, max_poim, diff_poim, poim_totalmass, kirmes_ini.MOTIF_LENGTH + 1, kirmes_ini.MOTIF_WINDOW_WIDTH, ) EasySVM.weblogoPOIM(motif_filename + ".png", poim, kirmes_ini.MOTIF_WINDOW_WIDTH) html += "<tr><td>" + motifs[i] + "</td>\n<td>" rel_src = os.path.split(motif_filename)[1] html += "<img src='" + rel_src + "_poim.png' /></td>\n<td>" html += "<img src='" + rel_src + ".png' width='350' /></td></tr>\n" html += "</table>\n" return html
def contrib(svm, kernel, motif_labels, kernel_array, motifs): """Calculate the contribution of each kernel""" (contrib_str, baseline_roc, baseline_prc, mean_acc) = EasySVM.evaluate( svm.classify().get_labels(), None, motif_labels ) html = "\n\n<h2>Training Error Evaluation</h2>\n<pre>" + contrib_str + "</pre>\n" rocs = [] prcs = [] for i in xrange(kernel.get_num_subkernels()): kernel_array[i].set_combined_kernel_weight(0.0) training_less_1 = svm.classify().get_labels() kernel_array[i].set_combined_kernel_weight(1.0) (res_str, mean_roc, mean_prc, mean_acc) = EasySVM.evaluate(training_less_1, None, motif_labels) rocs.append(baseline_roc - mean_roc) prcs.append(baseline_prc - mean_prc) # output formatting html += "<table>\n<tr><th>Motif Kernel</th>\n<th>PRC</th>\n</tr>\n" motifs.append("positional information (RBF)") order = argsort(prcs) for i in xrange(len(prcs) - 1, -1, -1): html += "<tr><td>Kernel for " + motifs[order[i]] + "</td>\n" # html += "<td>" + str(rocs[order[i]]) + "</td>\n" html += "<td>" + str(prcs[order[i]]) + "</td></tr>\n" html += "</table>\n" motifs.pop() return html
def evaluate(options, svm, kernel, features, motifs): """Evaluate examples using a trained kernel""" query = MotifFinder(finder_settings=MotifFinderSettings( kirmes_ini.MOTIF_LENGTH, options.window_width)) query.setFastaFile(options.query) query.setMotifs(options.qgff) qmotifs, qpositions = query.getResults() feats_query = CombinedFeatures() wds_svm = EasySVM.EasySVM(kirmes_ini.WDS_KERNEL_PARAMETERS) try: assert set(qmotifs.keys()).issuperset(set(motifs)) except AssertionError: print "The motif positions in the query sequence are incomplete, there are no positions for:" print set(motifs).difference(qmotifs.keys()) raise for motif in motifs: feats_query.append_feature_obj(wds_svm.createFeatures(qmotifs[motif])) query_positions = array(qpositions, dtype=float64) query_positions = query_positions.T rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS) feats_query.append_feature_obj(rbf_svm.createFeatures(query_positions)) kernel.init(features, feats_query) out = svm.classify().get_labels() qgenes = query.getGenes() ret_str = "" print "#example\toutput\tsplit" for i in xrange(len(out)): if out[i] >= 0: classif = "\tpositive\t" else: classif = "\tnegative\t" ret_str += qgenes[i] + classif + str(out[i]) + "\n" print str(i) + "\t" + str(out[i]) + "\t0" return ret_str
def contrib(svm, kernel, motif_labels, kernel_array, motifs): """Calculate the contribution of each kernel""" (contrib_str, baseline_roc, baseline_prc, mean_acc) = EasySVM.evaluate(svm.classify().get_labels(), None, motif_labels) html = "\n\n<h2>Training Error Evaluation</h2>\n<pre>" + contrib_str + "</pre>\n" rocs = [] prcs = [] for i in xrange(kernel.get_num_subkernels()): kernel_array[i].set_combined_kernel_weight(0.0) training_less_1 = svm.classify().get_labels() kernel_array[i].set_combined_kernel_weight(1.0) (res_str, mean_roc, mean_prc, mean_acc) = EasySVM.evaluate(training_less_1, None, motif_labels) rocs.append(baseline_roc - mean_roc) prcs.append(baseline_prc - mean_prc) #output formatting html += "<table>\n<tr><th>Motif Kernel</th>\n<th>PRC</th>\n</tr>\n" motifs.append("positional information (RBF)") order = argsort(prcs) for i in xrange(len(prcs) - 1, -1, -1): html += "<tr><td>Kernel for " + motifs[order[i]] + "</td>\n" #html += "<td>" + str(rocs[order[i]]) + "</td>\n" html += "<td>" + str(prcs[order[i]]) + "</td></tr>\n" html += "</table>\n" motifs.pop() return html
def poims(svm, kernel, kernel_array, motifs, path): """Plot POIMs and sequence logos from WD kernels""" html = "\n\n<h2>POIMs and Sequence Logos</h2>\n" html += "<table>\n<tr><th>Motif</th>\n<th>POIM</th>\n<th>Sequence Logo</th>\n</tr>\n" for i in xrange(kernel.get_num_subkernels() - 1): (poim, max_poim, diff_poim, poim_totalmass) = \ EasySVM.computePOIMs(svm, kernel_array[i], kirmes_ini.MOTIF_LENGTH, kirmes_ini.MOTIF_WINDOW_WIDTH) motif_filename = os.path.join(path, motifs[i]) EasySVM.plotPOIMs(motif_filename + "_poim.png", poim, max_poim, diff_poim, poim_totalmass, kirmes_ini.MOTIF_LENGTH + 1, kirmes_ini.MOTIF_WINDOW_WIDTH) EasySVM.weblogoPOIM(motif_filename + ".png", poim, kirmes_ini.MOTIF_WINDOW_WIDTH) html += "<tr><td>" + motifs[i] + "</td>\n<td>" rel_src = os.path.split(motif_filename)[1] html += "<img src='" + rel_src + "_poim.png' /></td>\n<td>" html += "<img src='" + rel_src + ".png' width='350' /></td></tr>\n" html += "</table>\n" return html
def training_run(options): """Conduct a training run and return a trained SVM kernel""" settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH, options.window_width, options.replace) positives = MotifFinder(finder_settings=settings) positives.setFastaFile(options.positives) positives.setMotifs(options.pgff) pmotifs, ppositions = positives.getResults() negatives = MotifFinder(finder_settings=settings) negatives.setFastaFile(options.negatives) negatives.setMotifs(options.ngff) nmotifs, npositions = negatives.getResults() wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS wds_svm = EasySVM.EasySVM(wds_kparams) num_positives = len(pmotifs.values()[0]) num_negatives = len(nmotifs.values()[0]) #Creating Kernel Objects kernel = CombinedKernel() features = CombinedFeatures() kernel_array = [] motifs = pmotifs.keys() motifs.sort() #Adding Kmer Kernels for motif in motifs: all_examples = pmotifs[motif] + nmotifs[motif] motif_features = wds_svm.createFeatures(all_examples) wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, \ wds_kparams['degree']) wds_kernel.set_shifts(wds_kparams['shift'] * ones(wds_kparams['seqlength'], dtype=int32)) features.append_feature_obj(motif_features) kernel_array.append(wds_kernel) kernel.append_kernel(wds_kernel) rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS) positions = array(ppositions + npositions, dtype=float64).T position_features = rbf_svm.createFeatures(positions) features.append_feature_obj(position_features) motif_labels = append(ones(num_positives), -ones(num_negatives)) complete_labels = Labels(motif_labels) rbf_kernel = GaussianKernel(position_features, position_features, \ kirmes_ini.RBF_KERNEL_PARAMETERS['width']) kernel_array.append(rbf_kernel) kernel.append_kernel(rbf_kernel) #Kernel init kernel.init(features, features) kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE) svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels) svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS) #Training svm.train() if not os.path.exists(options.output_path): os.mkdir(options.output_path) html = {} if options.contrib: html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array, motifs) if options.logos: html["poims"] = poims(svm, kernel, kernel_array, motifs, options.output_path) if options.query: html["query"] = evaluate(options, svm, kernel, features, motifs) htmlize(html, options.output_html)