示例#1
0
def poims(svm, kernel, kernel_array, motifs, path):
    """Plot POIMs and sequence logos from WD kernels"""
    html = "\n\n<h2>POIMs and Sequence Logos</h2>\n"
    html += "<table>\n<tr><th>Motif</th>\n<th>POIM</th>\n<th>Sequence Logo</th>\n</tr>\n"
    for i in xrange(kernel.get_num_subkernels() - 1):
        (poim, max_poim, diff_poim, poim_totalmass) = EasySVM.computePOIMs(
            svm, kernel_array[i], kirmes_ini.MOTIF_LENGTH, kirmes_ini.MOTIF_WINDOW_WIDTH
        )
        motif_filename = os.path.join(path, motifs[i])
        EasySVM.plotPOIMs(
            motif_filename + "_poim.png",
            poim,
            max_poim,
            diff_poim,
            poim_totalmass,
            kirmes_ini.MOTIF_LENGTH + 1,
            kirmes_ini.MOTIF_WINDOW_WIDTH,
        )
        EasySVM.weblogoPOIM(motif_filename + ".png", poim, kirmes_ini.MOTIF_WINDOW_WIDTH)
        html += "<tr><td>" + motifs[i] + "</td>\n<td>"
        rel_src = os.path.split(motif_filename)[1]
        html += "<img src='" + rel_src + "_poim.png' /></td>\n<td>"
        html += "<img src='" + rel_src + ".png' width='350' /></td></tr>\n"
    html += "</table>\n"
    return html
示例#2
0
def contrib(svm, kernel, motif_labels, kernel_array, motifs):
    """Calculate the contribution of each kernel"""
    (contrib_str, baseline_roc, baseline_prc, mean_acc) = EasySVM.evaluate(
        svm.classify().get_labels(), None, motif_labels
    )
    html = "\n\n<h2>Training Error Evaluation</h2>\n<pre>" + contrib_str + "</pre>\n"
    rocs = []
    prcs = []
    for i in xrange(kernel.get_num_subkernels()):
        kernel_array[i].set_combined_kernel_weight(0.0)
        training_less_1 = svm.classify().get_labels()
        kernel_array[i].set_combined_kernel_weight(1.0)
        (res_str, mean_roc, mean_prc, mean_acc) = EasySVM.evaluate(training_less_1, None, motif_labels)
        rocs.append(baseline_roc - mean_roc)
        prcs.append(baseline_prc - mean_prc)
    # output formatting
    html += "<table>\n<tr><th>Motif Kernel</th>\n<th>PRC</th>\n</tr>\n"
    motifs.append("positional information (RBF)")
    order = argsort(prcs)
    for i in xrange(len(prcs) - 1, -1, -1):
        html += "<tr><td>Kernel for " + motifs[order[i]] + "</td>\n"
        # html += "<td>" + str(rocs[order[i]]) + "</td>\n"
        html += "<td>" + str(prcs[order[i]]) + "</td></tr>\n"
    html += "</table>\n"
    motifs.pop()
    return html
示例#3
0
def evaluate(options, svm, kernel, features, motifs):
    """Evaluate examples using a trained kernel"""
    query = MotifFinder(finder_settings=MotifFinderSettings(
        kirmes_ini.MOTIF_LENGTH, options.window_width))
    query.setFastaFile(options.query)
    query.setMotifs(options.qgff)
    qmotifs, qpositions = query.getResults()
    feats_query = CombinedFeatures()
    wds_svm = EasySVM.EasySVM(kirmes_ini.WDS_KERNEL_PARAMETERS)
    try:
        assert set(qmotifs.keys()).issuperset(set(motifs))
    except AssertionError:
        print "The motif positions in the query sequence are incomplete, there are no positions for:"
        print set(motifs).difference(qmotifs.keys())
        raise
    for motif in motifs:
        feats_query.append_feature_obj(wds_svm.createFeatures(qmotifs[motif]))
    query_positions = array(qpositions, dtype=float64)
    query_positions = query_positions.T
    rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS)
    feats_query.append_feature_obj(rbf_svm.createFeatures(query_positions))
    kernel.init(features, feats_query)
    out = svm.classify().get_labels()
    qgenes = query.getGenes()
    ret_str = ""
    print "#example\toutput\tsplit"
    for i in xrange(len(out)):
        if out[i] >= 0:
            classif = "\tpositive\t"
        else:
            classif = "\tnegative\t"
        ret_str += qgenes[i] + classif + str(out[i]) + "\n"
        print str(i) + "\t" + str(out[i]) + "\t0"
    return ret_str
示例#4
0
def contrib(svm, kernel, motif_labels, kernel_array, motifs):
    """Calculate the contribution of each kernel"""
    (contrib_str, baseline_roc, baseline_prc,
     mean_acc) = EasySVM.evaluate(svm.classify().get_labels(), None,
                                  motif_labels)
    html = "\n\n<h2>Training Error Evaluation</h2>\n<pre>" + contrib_str + "</pre>\n"
    rocs = []
    prcs = []
    for i in xrange(kernel.get_num_subkernels()):
        kernel_array[i].set_combined_kernel_weight(0.0)
        training_less_1 = svm.classify().get_labels()
        kernel_array[i].set_combined_kernel_weight(1.0)
        (res_str, mean_roc, mean_prc,
         mean_acc) = EasySVM.evaluate(training_less_1, None, motif_labels)
        rocs.append(baseline_roc - mean_roc)
        prcs.append(baseline_prc - mean_prc)
    #output formatting
    html += "<table>\n<tr><th>Motif Kernel</th>\n<th>PRC</th>\n</tr>\n"
    motifs.append("positional information (RBF)")
    order = argsort(prcs)
    for i in xrange(len(prcs) - 1, -1, -1):
        html += "<tr><td>Kernel for " + motifs[order[i]] + "</td>\n"
        #html += "<td>" + str(rocs[order[i]]) + "</td>\n"
        html += "<td>" + str(prcs[order[i]]) + "</td></tr>\n"
    html += "</table>\n"
    motifs.pop()
    return html
示例#5
0
def poims(svm, kernel, kernel_array, motifs, path):
    """Plot POIMs and sequence logos from WD kernels"""
    html = "\n\n<h2>POIMs and Sequence Logos</h2>\n"
    html += "<table>\n<tr><th>Motif</th>\n<th>POIM</th>\n<th>Sequence Logo</th>\n</tr>\n"
    for i in xrange(kernel.get_num_subkernels() - 1):
        (poim, max_poim, diff_poim, poim_totalmass) = \
            EasySVM.computePOIMs(svm, kernel_array[i], kirmes_ini.MOTIF_LENGTH,
                                 kirmes_ini.MOTIF_WINDOW_WIDTH)
        motif_filename = os.path.join(path, motifs[i])
        EasySVM.plotPOIMs(motif_filename + "_poim.png", poim, max_poim,
                          diff_poim, poim_totalmass,
                          kirmes_ini.MOTIF_LENGTH + 1,
                          kirmes_ini.MOTIF_WINDOW_WIDTH)
        EasySVM.weblogoPOIM(motif_filename + ".png", poim,
                            kirmes_ini.MOTIF_WINDOW_WIDTH)
        html += "<tr><td>" + motifs[i] + "</td>\n<td>"
        rel_src = os.path.split(motif_filename)[1]
        html += "<img src='" + rel_src + "_poim.png' /></td>\n<td>"
        html += "<img src='" + rel_src + ".png' width='350' /></td></tr>\n"
    html += "</table>\n"
    return html
示例#6
0
def training_run(options):
    """Conduct a training run and return a trained SVM kernel"""
    settings = MotifFinderSettings(kirmes_ini.MOTIF_LENGTH,
                                   options.window_width, options.replace)
    positives = MotifFinder(finder_settings=settings)
    positives.setFastaFile(options.positives)
    positives.setMotifs(options.pgff)
    pmotifs, ppositions = positives.getResults()
    negatives = MotifFinder(finder_settings=settings)
    negatives.setFastaFile(options.negatives)
    negatives.setMotifs(options.ngff)
    nmotifs, npositions = negatives.getResults()

    wds_kparams = kirmes_ini.WDS_KERNEL_PARAMETERS
    wds_svm = EasySVM.EasySVM(wds_kparams)
    num_positives = len(pmotifs.values()[0])
    num_negatives = len(nmotifs.values()[0])
    #Creating Kernel Objects
    kernel = CombinedKernel()
    features = CombinedFeatures()
    kernel_array = []
    motifs = pmotifs.keys()
    motifs.sort()
    #Adding Kmer Kernels
    for motif in motifs:
        all_examples = pmotifs[motif] + nmotifs[motif]
        motif_features = wds_svm.createFeatures(all_examples)
        wds_kernel = WeightedDegreePositionStringKernel(motif_features, motif_features, \
                                                        wds_kparams['degree'])
        wds_kernel.set_shifts(wds_kparams['shift'] *
                              ones(wds_kparams['seqlength'], dtype=int32))
        features.append_feature_obj(motif_features)
        kernel_array.append(wds_kernel)
        kernel.append_kernel(wds_kernel)
    rbf_svm = EasySVM.EasySVM(kirmes_ini.RBF_KERNEL_PARAMETERS)
    positions = array(ppositions + npositions, dtype=float64).T
    position_features = rbf_svm.createFeatures(positions)
    features.append_feature_obj(position_features)
    motif_labels = append(ones(num_positives), -ones(num_negatives))
    complete_labels = Labels(motif_labels)
    rbf_kernel = GaussianKernel(position_features, position_features, \
                                kirmes_ini.RBF_KERNEL_PARAMETERS['width'])
    kernel_array.append(rbf_kernel)
    kernel.append_kernel(rbf_kernel)
    #Kernel init
    kernel.init(features, features)
    kernel.set_cache_size(kirmes_ini.K_CACHE_SIZE)
    svm = LibSVM(kirmes_ini.K_COMBINED_C, kernel, complete_labels)
    svm.parallel.set_num_threads(kirmes_ini.K_NUM_THREADS)
    #Training
    svm.train()
    if not os.path.exists(options.output_path):
        os.mkdir(options.output_path)
    html = {}
    if options.contrib:
        html["contrib"] = contrib(svm, kernel, motif_labels, kernel_array,
                                  motifs)
    if options.logos:
        html["poims"] = poims(svm, kernel, kernel_array, motifs,
                              options.output_path)
    if options.query:
        html["query"] = evaluate(options, svm, kernel, features, motifs)
    htmlize(html, options.output_html)