示例#1
0
文件: views.py 项目: gorysko/roundup
def lookup(request):
    '''
    get: render a form for user to lookup the sequence id for a fasta sequence from a genome.
    post: redirect to a page which will show the id.
    '''
    raise django.http.Http404

    if request.method == 'POST': # If the form has been submitted...
        form = LookupForm(request.POST) # A form bound to the POST data
        if form.is_valid(): # All validation rules pass
            logging.debug(form.cleaned_data)
            genome, fasta = form.cleaned_data['genome'], form.cleaned_data['fasta'] 
            seqId = BioUtilities.findSeqIdWithFasta(fasta, roundup.dataset.getGenomeIndexPath(webconfig.CURRENT_DATASET, genome))
            # store result in cache, so can do a redirect/get. 
            key = makeUniqueId()
            roundup_util.cacheSet(key, {'genome': genome, 'fasta': fasta, 'seqId': seqId})
            # redirect the post to a get.  http://en.wikipedia.org/wiki/Post/Redirect/Get
            return django.shortcuts.redirect(django.core.urlresolvers.reverse(lookup_result, kwargs={'key': key}))
    else:
        form = LookupForm() # An unbound form

    example = "{'fasta': '>example_nameline\\nMNFLWKGRRFLIAGILPTFEGAADEIVDKENKTYKAFLASKPPEETGLERLKQMFTIDEF', 'genome': '7227'}"
    return django.shortcuts.render(request, 'lookup.html', {'form': form, 'nav_id': 'lookup', 'form_doc_id': 'lookup',
                                                            'form_action': django.core.urlresolvers.reverse(lookup), 'form_example': example})
示例#2
0
def resultToGeneView(resultId, urlFunc, otherParams={}):
    '''
    geneIndex: index of gene/cluster row in result to view.
    for the cluster, get its terms, then get all the clusters associated with those terms and compute the pairwise profile distance of those terms.
    also compute the mean distance of the cluster and every other cluster in each term.
    report each term in the cluster, the mean pairwise hamming distance and the mean distance between cluster and every other cluster of the term.
    '''
    result = getResult(resultId)
    ds = result['dataset']
    geneIndex = otherParams.get('gene')
    if not util.isInteger(geneIndex):
        raise Exception('resultToGeneView(): geneIndex is not an integer. geneIndex=%s'%geneIndex)
    geneIndex = int(geneIndex)
    selectedRow = result['rows'][geneIndex]
    seqIdToDataMap = result.get('seq_id_to_data_map', {})
    termMap = result.get('term_map', {})
    clusterIndexToProfileMap = {}
    clusterIndexToBestGeneNameMap = {}
    termToClusterIndicesMap = {}
    headers = result['headers']
    genomes = headers[:-1]
    genomeIdToGenomeMap = result.get('genome_id_to_genome_map', {})
    orthologs = result['orthologs'][geneIndex] # orthologs is a list of lists of orthologs for each row.

    # get terms to clusters map, etc.
    for index in range(len(result['rows'])):
        # row has an element for each genome that contains 0 or more seq ids, and row has the distance of the cluster as its final element.
        row = result['rows'][index]
        clusterIndexToProfileMap[index] = getProfileForCluster(row)
        clusterIndexToBestGeneNameMap[index] = getBestClusterGeneName(row, headers, seqIdToDataMap)
        clusterTerms = getTermsForCluster(row, seqIdToDataMap)
        for term in clusterTerms:
            termToClusterIndicesMap.setdefault(term, []).append(index)
    # get terms for geneIndex cluster.
    geneTerms = getTermsForCluster(selectedRow, seqIdToDataMap)

    # generate list of mean distance information for each term.
    termDataList = []
    for termId in geneTerms:
        clusterIndices = termToClusterIndicesMap[termId]
        numClusters = len(clusterIndices)
        meanPairwiseHammingDistance = computeMeanPairwiseHammingDistance(result, clusterIndices, clusterIndexToProfileMap)
        # ignore terms associated with many clusters on the assumption that they are too general to be interesting.
        if meanPairwiseHammingDistance is None:
            continue
        totalHammingDistanceFromGene = 0
        num = 0
        for index in clusterIndices:
            num += 1
            if index != geneIndex:
                totalHammingDistanceFromGene += hammingDistanceForProfiles(clusterIndexToProfileMap[index], clusterIndexToProfileMap[geneIndex])
        if num > 1:
            meanHammingDistanceFromGene = totalHammingDistanceFromGene / float(num)
        else:
            meanHammingDistanceFromGene = 0            
        termDataList.append((meanHammingDistanceFromGene, meanPairwiseHammingDistance, numClusters, termId))
    termDataList.sort()

    termResultUrl = makeResultUrl(resultId, urlFunc, resultType=TERM_RESULT, templateType=WIDE_TEMPLATE)
    content = ''
    content += makeQueryDescHtml(result)
    content += '<h3>Result Description</h3>'
    content += '<pre>A table displays the orthologous sequences, '+headers[-1]+', and Phyletic Profile for the selected gene cluster.\n'
    content += 'Another table lists all the Gene Ontology terms associated with the selected gene.\n'
    content += 'That sortable table includes the following columns:\n'
    content += '\tThe mean hamming distance between the profile of the selected gene cluster and each profile of the other gene clusters in the result annotated with the GO term\n'
    content += '\tThe mean hamming distance between each pair of profiles of all gene clusters in the result annotated with the GO term\n'
    content += '\tNumber of gene clusters in the result annotated with each GO term.\n'
    content += 'For GO terms with only one associated gene cluster, the mean pairwise hamming distance is defined as 0.\n'
    content += 'GO terms associated with more than '+str(TERM_PROMISCUITY_LIMIT)+' gene clusters are excluded as being too general to be interesting.\n'
    content += 'Click on table column headers to sort by that column.  Please be patient with large data sets.\n'
    content += '</pre>'
    content += '<h3>Result</h3>'
    content += '<h4>Gene Cluster Information</h4>'
    content += makeSequenceClustersTable(result, resultId, urlFunc, clusterIndices=[geneIndex])
    content += '<h4>GO Term Information</h4>'
    content += '<table class="sortable" id="term_summary_table">'
    content += '<tr><th>GO&nbsp;Identifier</th><th>GO Term</th><th>Mean Gene Hamming Distance From Selected Gene</th>'
    content += '<th>Mean Pairwise Gene Hamming Distance</th><th>Number of Genes</th></tr>\n'
    for t in termDataList:
        content += '<tr><td><a href="%s">%s</a></td><td>%s</td>'%(termResultUrl+'&term='+urllib.quote_plus(t[3]), t[3], termMap[t[3]])
        content += '<td>%.2f</td><td>%.2f</td><td>%d</td></tr>\n'%(t[0], t[1], t[2])
    content += '</table>'
    content += '<h4>FASTA Sequence Information</h4>'
    content += '<ul>'
    for i in range(len(genomes)):
        genome = genomes[i]
        content += '<li>'+genomeDisplayName(ds, genome)+'<br/><pre>'
        for seqId in selectedRow[i]:
            try:
                fastaPath = roundup.dataset.getGenomeIndexPath(ds, genome)
                logging.debug('fastaPath: {}'.format(fastaPath))
                content += BioUtilities.getFastaForId(seqIdToDataMap.get(seqId, {}).get(roundup_common.EXTERNAL_SEQUENCE_ID_KEY), fastaPath)
            except:
                logging.exception('Error. genome=%s, seqId=%s'%(genome, seqId))
                content += 'Failed to get FASTA for sequence %s\n'%(seqIdToDataMap.get(seqId, {}).get(roundup_common.EXTERNAL_SEQUENCE_ID_KEY))
        content += '</pre>'
    content += '</ul>'
    content += '<h4>Gene Cluster Orthologs</h4>'
    content += '<table class="sortable" id="ortholog_table">'
    content += '<tr><th>Sequence 1</th><th>Genome 1</th><th>Sequence 2</th><th>Genome 2</th><th>Evolutionary Distance</th></tr>\n'
    for (seqId1, seqId2, distance) in orthologs:
        acc1 = seqIdToDataMap.get(seqId1, {}).get(roundup_common.EXTERNAL_SEQUENCE_ID_KEY)
        genome1 = genomeIdToGenomeMap.get(seqIdToDataMap.get(seqId1, {}).get(roundup_common.GENOME_ID_KEY))
        acc2 = seqIdToDataMap.get(seqId2, {}).get(roundup_common.EXTERNAL_SEQUENCE_ID_KEY)
        genome2 = genomeIdToGenomeMap.get(seqIdToDataMap.get(seqId2, {}).get(roundup_common.GENOME_ID_KEY))
        content += '<tr><td>%s</td><td>%s</td>'%(acc1, genomeDisplayName(ds, genome1))
        content += '<td>%s</td><td>%s</td>'%(acc2, genomeDisplayName(ds, genome2))
        content += '<td>%.3f</td></tr>'%distance
    content += '</table>'
    return content