Exemplo n.º 1
0
def abstracts(request):
    """Return a list of abstracts (as HTML wrapped in JSON) for a keyword
    query and list of genes."""

    # get species, default=human
    try:
        species = int(request.GET['species'])
    except (KeyError, ValueError):
        species = 9606
    
    # get gene operator (any (or) / all (and))
    try:
        geneop = request.GET['geneop'].lower()
        if geneop == 'all':
            implicitOr = False
        else:
            geneop = 'any'
            implicitOr = True
    except KeyError:
        geneop = 'any'
        implicitOr = True

    # figure out if we should include homologs
    try:
        usehomologs = parseboolean(request.GET['usehomologs'])
    except (KeyError, ValueError):
        usehomologs = False  
        
    # get keyword arguments from query string
    keywords = request.GET.get('q')

    # get genes from query string
    try:
        if parseboolean(request.GET.get('usegenefile')):
            # look up genes from file if we're using one
            genefileID = request.GET.get('genefileID', -1)
            genes = genefile_lookup(genefileID, implicitOr, usehomologs)
        elif request.GET.get('genes'):
            gene_query = request.GET.get('genes')
            genes = parse_gene_abstractquery(gene_query, species, implicitOr, usehomologs)
        else:
            genes = NullQuery

        if request.GET.get('rowgene'):
            genes = addgene(genes, request.GET.get('rowgene'), species, usehomologs)

        # apply gene filter
        if request.GET.get('genefilter'):
            genes = addgene(genes, request.GET.get('genefilter'), species, usehomologs)

    except LookupError as e:
        # bad gene query
        response = HttpResponse()
        json.dump({'validresult': False, 'errmsg': 'Bad gene query.  Check your gene symbols: {0}.'.format(e.args[0])}, response)
        return response
    except BadGenefileError:
        response = HttpResponse()
        json.dump({'validresult': False, 'errmsg': "Can't find this gene file!  It probably expired.  Please upload it again."})
        return response

    # should we only include reviews?
    try:
        onlyreviews = parseboolean(request.GET['onlyreviews'])
    except (KeyError, ValueError):
        onlyreviews = False
    
    # error if no query
    if not keywords and not genes:
        response = HttpResponse()
        json.dump({'validresult': False, 'errmsg': 'You must supply either genes or a query'}, response)
        return response
    
    # get sorting parameter
    orderby = request.GET.get('orderby')
    if orderby: orderby = orderby.lower()
    if orderby not in (None, 'relevance', 'oldest', 'newest'):
        response = HttpResponse()
        json.dump({'validresult': False, 'errmsg': 'Invalid "orderby."  Valid options are None, "relevance", "oldest", or "newest".'}, response)
        return response
    
    # get limit and offset
    try: offset = int(request.GET.get('offset'))
    except: offset = 0
    try: limit = int(request.GET.get('limit'))
    except: limit = None
    
    # get keyword ID from query string
    keywordID = request.GET.get('keywordnum')
    if keywordID:
        keyword_abstracts = [a.pubmed_id for a in Abstract.objects.filter(ka_abstract__keyphrase=keywordID).only('pubmed_id')]
    else:
        keyword_abstracts = None
        
        
    # get optional metabolite ID
    metabolite = request.GET.get('metabolite')
    
    
    # get abstract ID's from index
    abstracts = abstracts_page(keywords, genes, usehomologs, limit, offset, orderby, onlyreviews, keyword_abstracts, metabolite)
    
    # error if no abstracts
    if not abstracts:
        response = HttpResponse()
        json.dump({'validresult': False, 'errmsg': 'No more abstracts!'}, response)
        return response
    
    # create response
    resulthtml = render_to_string('abstracts.html', {'abstracts': abstracts})
    response = HttpResponse()
    json.dump({'validresult': True, 'result': resulthtml}, response)
    return response
Exemplo n.º 2
0
def genesearch(request):
    """Does the actual search for the gene search.  Given a keyword query,
    a list of genes, species, homology option, offset, limit, sorting
    criterion, and response type (all via the query string), fetches a list of
    genes relevent to the query via the index and database, and returns the 
    appropriate response."""
    
    params = searchparams(request)
        
    # use homology option to decide which gene-abstract table and which
    # abstract-count column to use.
    if params.usehomologs:
        geneabstract_tablename = 'homologene_gene_abstract'
        abstract_col = 'homolog_abstracts'
    else:
        geneabstract_tablename = 'gene_abstract'
        abstract_col = 'abstracts'
    
    if params.genes or params.usegenefile:
        try:
            # get a query to run against the abstract index
            if params.usegenefile:
                genequery = genefile_lookup(params.genefileID, implicitOr=params.implicitOr, usehomologs=params.usehomologs)
            else:
                genequery = parse_gene_abstractquery(q=params.genes, tax=params.species, implicitOr=params.implicitOr, usehomologs=params.usehomologs)
        except LookupError as e:
            # a term in the gene query couldn't be matched to any genes.
            return searchresponse(validresult=False, download=params.download, errmsg='No genes match <b>{0}</b> for species {1}'.format(e.args[0], params.species))
        except BadGenefileError:
            return searchresponse(validresult=False, download=params.download, errmsg="Can't find this gene file!  It probably expired.  Please upload it again.""")
    else:
        genequery = None

    # don't do anything if we don't have a query
    if not genequery and not params.keywords:
        return searchresponse(validresult=False, download=params.download, errmsg="Please enter gene symbols or a keyword query.")
    
    # get abstracts matching keywords and genes
    abstracts = get_abstracts(params.keywords, genequery, params.usehomologs)
    query_abstract_count = len(abstracts)

    # error if no abstracts matched the query
    if abstracts == []:
        return searchresponse(validresult=False, download=params.download, errmsg="Your query did not match any abstracts.", query=params.keywords, genes=params.genes, usehomologs=params.usehomologs, usegenefile=params.usegenefile)

    # get corpus size
    total_abstract_count = corpus_size()

    if params.orderby in query_orderbys:
        query_orderby = query_orderbys[params.orderby] # orderby term to insert into SQL
    else:
        query_orderby = params.orderby = 'f1_score'

    def paramstring(l):
        """Return a string of comma-separated %s's of length l
        (faster and more memory-efficient than using a list comprehension)"""
        def slist():
            for i in xrange(l): yield "%s"
        return ','.join(slist())
    
    # build SQL query for fetching genes
    sqlquery = """
    SELECT g.*, 
        `{abstract_col}` `abstracts_display`,
        COUNT(*) hits, 
        COUNT(*)/ (`{abstract_col}` + 10) `precision`,
        (2 * (COUNT(*) / `{abstract_col}`) * (COUNT(*) / {query_abstract_count})) / 
            ((COUNT(*) / `{abstract_col}`) + (COUNT(*) / {query_abstract_count})) f1_score
    FROM `{geneabstract_tablename}` a
    INNER JOIN `gene` g
    ON g.entrez_id = a.gene
    WHERE a.`abstract` in ({paramstring})
    AND g.`tax_id` = %s
    GROUP BY g.entrez_id
    ORDER BY `{orderby}` DESC
    LIMIT %s, %s;
    """.format(
        paramstring=paramstring(len(abstracts)), 
        orderby=query_orderby, 
        query_abstract_count=query_abstract_count,
        species=params.species,
        geneabstract_tablename=geneabstract_tablename,
        abstract_col=abstract_col)
    
    # execute sql query, get genes
    results = Gene.objects.raw(sqlquery, abstracts + [params.species, params.offset, params.query_limit])
    
    # calculate p values
    # '{0:.2e}'.format()
    phyper = robjects.r['phyper']
    pvals_float = [phyper(g.hits-1, query_abstract_count, total_abstract_count-query_abstract_count, g.abstracts_display, lower_tail=False)[0]  for g in results]
    pvals = [('{0:.2e}'.format(p) if p > 0.0000000001 else '< 1e-10') for p in pvals_float]

    if not pvals: 
        return searchresponse(validresult=False, download=params.download, errmsg="Your query didn't match any genes.", query=params.keywords, genes=params.genes, usehomologs=params.usehomologs, species=params.species, usegenefile=params.usegenefile)

    return searchresponse(validresult=True, download=params.download, results=results, genes=params.genes, geneop=params.geneop, pvals=pvals, offset=params.offset, orderby=params.orderby, query=params.keywords, limit=params.limit, usehomologs=params.usehomologs, species=params.species, query_abstract_count=query_abstract_count, abstracts=abstracts, usegenefile=params.usegenefile)