Пример #1
0
def get_search_query():
    query = request.args.get("query", None)
    is_oa = request.args.get("is_oa", None)

    if is_oa is not None:
        try:
            is_oa = str_to_bool(is_oa)
        except ValueError:
            if is_oa == 'null':
                is_oa = None
            else:
                abort_json(400, "is_oa must be 'true' or 'false'")

    if not query:
        abort_json(400, "query parameter is required")

    start_time = time()
    response = fulltext_search_title(query, is_oa)
    sorted_response = sorted(response, key=lambda k: k['score'], reverse=True)

    for api_response in sorted_response:
        doi = api_response['response']['doi']
        version_suffix = re.findall(ur'[./](v\d+)$', doi, re.IGNORECASE)

        if version_suffix:
            title = api_response['response']['title']
            title = u'{} ({})'.format(title, version_suffix[0].upper())
            api_response['response']['title'] = title

    elapsed_time = elapsed(start_time, 3)
    return jsonify({"results": sorted_response, "elapsed_seconds": elapsed_time})
Пример #2
0
def get_search_query(query):
    start_time = time()
    my_pubs = fulltext_search_title(query)
    response = [my_pub.to_dict_search() for my_pub in my_pubs]
    sorted_response = sorted(response, key=lambda k: k['score'], reverse=True)
    elapsed_time = elapsed(start_time, 3)
    return jsonify({"results": sorted_response, "elapsed_seconds": elapsed_time})
Пример #3
0
def get_search_query(query):
    start_time = time()
    my_pubs = fulltext_search_title(query)
    response = [my_pub.to_dict_search() for my_pub in my_pubs]
    sorted_response = sorted(response, key=lambda k: k['score'], reverse=True)
    elapsed_time = elapsed(start_time, 3)
    return jsonify({
        "results": sorted_response,
        "elapsed_seconds": elapsed_time
    })
Пример #4
0
def get_search_query(query):
    start_time = time()
    my_pubs = fulltext_search_title(query)

    print "building response"
    response = [my_pub.to_dict_serp() for my_pub in my_pubs]
    sorted_response = sorted(response, key=lambda k: k['score'], reverse=True)
    print "done building response"
    print "getting synonyms"
    synonym = get_synonym(query)
    print "done getting synonyms"
    print "getting terms"
    term_lookup = get_term_lookup(query)
    if synonym and not term_lookup:
        term_lookup = get_term_lookup(synonym)
    print "done getting terms"
    elapsed_time = elapsed(start_time, 3)
    return jsonify({
        "results": sorted_response,
        "synonym": synonym,
        "term_lookup": term_lookup,
        "elapsed_seconds": elapsed_time
    })
Пример #5
0
def get_search_query(query):

    start_time = time()

    query = query.replace(u"_", u" ")
    if request.headers.getlist("X-Forwarded-For"):
        ip = request.headers.getlist("X-Forwarded-For")[0]
    else:
        ip = request.remote_addr

    if not request.args.get("automated", None):
        log_query(query, ip)

    no_live_calls = request.args.get("no-live-calls", "")
    nocache = request.args.get("nocache", "true")
    return_full_api_response = True
    if request.args.get("minimum", ""):
        return_full_api_response = False

    query_entities = get_entities_from_query(query)
    print "query_entities", query_entities
    getting_entity_lookup_elapsed = elapsed(start_time, 3)

    # page starts at 1 not 0
    page = 1
    try:
        page = int(request.args.get("page"))
    except:
        pass

    if page > 10:
        abort_json(
            400,
            u"Page too large. API currently only supports 10 pages right now.")

    if request.args.get("pagesize"):
        pagesize = int(request.args.get("pagesize"))
    else:
        pagesize = 10
    if pagesize > 100:
        abort_json(400, u"pagesize too large; max 100")

    try:
        oa_only = str_to_bool(request.args.get("oa", "false"))
    except:
        oa_only = False

    if nocache:
        print u"skipping cache"
    else:
        if query_entities and len(query_entities) == 1 and page == 1:
            cached_response = get_cached_api_response(query_entities[0],
                                                      oa_only)
            if cached_response and cached_response[0]:
                (api_response, collected_date) = cached_response
                total_time = elapsed(start_time, 3)
                api_response["_cached_on"] = collected_date.isoformat()
                api_response["_timing"] = {"total": total_time}
                print "got response!!!"
                return jsonify(api_response)

    (pubs_to_sort, time_to_pmids_elapsed,
     time_for_pubs_elapsed) = fulltext_search_title(
         query, query_entities, oa_only, full=return_full_api_response)

    initializing_publist_start_time = time()
    # sorted_pubs = sorted(pubs_to_sort, key=lambda k: k.adjusted_score, reverse=True)
    # selected_pubs = sorted_pubs[(pagesize * (page-1)):(pagesize * page)]
    # selected_pmids = [p.pmid for p in selected_pubs]

    sorted_pubs = sorted(pubs_to_sort,
                         key=lambda k: k["adjusted_score"],
                         reverse=True)
    sorted_pubs = [p for p in sorted_pubs]
    selected_pubs = sorted_pubs[(pagesize * (page - 1)):(pagesize * page)]

    selected_dois = [p["doi"] for p in selected_pubs]
    print selected_dois

    selected_pubs_full = []
    if selected_dois:
        selected_pubs_full += db.session.query(PubDoi).filter(
            PubDoi.doi.in_(selected_dois)).options(
                orm.undefer_group('full')).all()

    selected_pubs_full = [p for p in selected_pubs_full
                          if not p.suppress]  # get rid of retracted ones
    for my_pub in selected_pubs_full:
        my_pub.adjusted_score = [
            p["adjusted_score"] for p in sorted_pubs
            if p["doi"] == my_pub.display_doi
        ][0]

    my_pub_list = PubList(pubs=selected_pubs_full)
    initializing_publist_elapsed = elapsed(initializing_publist_start_time, 3)

    set_dandelions_start_time = time()
    if not no_live_calls:
        my_pub_list.set_dandelions()
    set_dandelions_elapsed = elapsed(set_dandelions_start_time)
    set_pictures_start_time = time()
    my_pub_list.set_pictures()
    set_pictures_elapsed = elapsed(set_pictures_start_time)

    to_dict_start_time = time()
    results = my_pub_list.to_dict_serp_list(full=return_full_api_response)

    response = {
        "results": results,
        "page": page,
        "oa_only": oa_only,
        "total_num_pubs": min(100, len(pubs_to_sort)),
        "query_entities": query_entities
    }
    if return_full_api_response:
        response["annotations"] = my_pub_list.to_dict_annotation_metadata()

    to_dict_elapsed = elapsed(to_dict_start_time, 3)
    total_time = elapsed(start_time, 3)

    response["_timing"] = {
        "9 total": total_time,
        "1 getting_entity_lookup_elapsed": getting_entity_lookup_elapsed,
        "2 identify_pmids_for_top_100": time_to_pmids_elapsed,
        "3 loading_top_100_data_for_sorting": time_for_pubs_elapsed,
        "4 loading_final_10_full_pubs": initializing_publist_elapsed,
        "5 set_dandelions_elapsed": set_dandelions_elapsed,
        "6 set_pictures_elapsed": set_pictures_elapsed,
        "7 to_dict_elapsed": to_dict_elapsed,
    }

    print u"finished query for {}: took {} seconds".format(
        query, elapsed(start_time))
    return jsonify(response)