def make_query(query_string):

    q = Query(query_string)
    q.skip = 1
    q.top = 10

    return q
Exemplo n.º 2
0
def view_run_queries(request, topic_num):
    # from experiment_configuration import bm25

    num = 0
    query_file_name = os.path.join(data_dir, topic_num + '.queries')
    logging.debug(query_file_name)

    start_time = timeit.default_timer()
    query_list = []

    with open(query_file_name, "r") as query_file:
        while query_file and num < 200:
            num += 1
            line = query_file.readline()
            # print line
            parts = line.partition(' ')
            # print parts
            # TODO query_num = parts[0]
            query_str = unicode(parts[2])
            if query_str:
                logging.debug(query_str)
                q = Query(query_str)
                q.skip = 1
                # TODO response = bm25.search(q)
                query_list.append(query_str)
            else:
                break

    seconds = timeit.default_timer() - start_time

    context_dict = {'topic_num': topic_num, 'seconds': seconds, 'num': num}
    return render(request, 'base/query_test.html', context_dict)
Exemplo n.º 3
0
def search(request):
    """
    Accepts GET request containing query terms,
    searches and returns results as JSON.

    """
    # invalid HTTP method
    if request.method != 'GET':
        return HttpResponse(status=405)

    # checks input for validity
    query_terms = check_input(request.GET.get('q', ''))

    # bad request
    if not query_terms:
        return HttpResponse(status=400)

    # get exp ID and load experiment
    exp_id = request.session.get('exp_id', False)
    experiment = get_or_create_experiment(exp_id)

    # execute query
    engine = EngineFactory('bing', api_key="")
    query = Query(query_terms, top=experiment['top'], result_type="image")
    response = engine.search(query)

    return HttpResponse(response.to_json(), content_type='application/json')
Exemplo n.º 4
0
def run_query(query):
    q = Query(query, top=50)
    e = EngineFactory("Bing", api_key=bing_api_key)

    response = e.search(q)

    return response
Exemplo n.º 5
0
def run_query(query, condition):
    q = Query(query, top=100)

    # check cache, if query is there, return results
    # else send query to bing, and store the results in the cache
    response = bing_engine.search(q)
    mod = conditions[condition]
    mod_response = mod(response)

    return mod_response
Exemplo n.º 6
0
def run_query(query, condition):
    """
    runs a search query on Bing using the query string passed,
    applies the relevant modifier function and returns the results

    :param query: (str)the query input by the user
    :param condition: (int)the interface condition applied to the user's profile
    :return: (Response)the results of the search after applying the correct modifier function
    """
    q = Query(query, top=100)

    response = e.search(q)

    mod = conditions[condition]
    mod_results = mod(response)

    return mod_results
Exemplo n.º 7
0
def run_query(request, result_dict={}, query_terms='', page=1, page_len=10, condition=0, log_performance=False):
    # Stops an AWFUL lot of problems when people get up to mischief
    if page < 1:
        page = 1

    ec = get_experiment_context(request)

    query = Query(query_terms)
    query.skip = page
    query.top = page_len

    result_dict['query'] = query_terms
    search_engine = experiment_setups[condition].get_engine()

    result_cache = True
    response = None

    if result_cache:
        if cache.cache.get(str(query)):
            response = cache.cache.get(str(query))
        else:
            response = search_engine.search(query)
            cache.cache.set(str(query),response, 500)
    else:
        response = search_engine.search(query)



    num_pages = response.total_pages

    result_dict['trec_results'] = None
    result_dict['trec_no_results_found'] = True
    result_dict['trec_search'] = False
    result_dict['num_pages'] = num_pages

    print "PAGE"
    print num_pages

    if num_pages > 0:
        result_dict['trec_search'] = True
        result_dict['trec_results'] = response.results

        result_dict['curr_page'] = response.actual_page
        if page > 1:
            result_dict['prev_page'] = page - 1
            result_dict['prev_page_show'] = True

            if (page - 1) == 1:
                result_dict['prev_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=1&noperf=true'
            else:
                result_dict['prev_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=' + str(page - 1)
        if page < num_pages:
            result_dict['next_page'] = page + 1
            result_dict['next_page_show'] = True
            result_dict['next_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=' + str(page + 1)

    # Disable performance logging - it's a hogging the performance!
    # If log_performance is True, we log the performance metrics.
    #if log_performance:
    #    log_event(event="QUERY_PERF",
    #              request=request,
    #              query=query_terms,
    #              metrics=get_query_performance_metrics(result_dict['trec_results'], ec['topicnum']))

    return result_dict
Exemplo n.º 8
0
def run_query(request,
              result_dict={},
              query_terms='',
              page=1,
              page_len=10,
              condition=0,
              log_performance=False):
    # Stops an AWFUL lot of problems when people get up to mischief
    if page < 1:
        page = 1

    ec = get_experiment_context(request)

    query = Query(query_terms)
    query.skip = page
    query.top = page_len

    result_dict['query'] = query_terms
    search_engine = experiment_setups[condition].get_engine()

    result_cache = True
    response = None

    if result_cache:
        if cache.cache.get(str(query)):
            response = cache.cache.get(str(query))
        else:
            response = search_engine.search(query)
            cache.cache.set(str(query), response, 500)
    else:
        response = search_engine.search(query)

    num_pages = response.total_pages

    result_dict['trec_results'] = None
    result_dict['trec_no_results_found'] = True
    result_dict['trec_search'] = False
    result_dict['num_pages'] = num_pages

    print "PAGE"
    print num_pages

    if num_pages > 0:
        result_dict['trec_search'] = True
        result_dict['trec_results'] = response.results

        result_dict['curr_page'] = response.actual_page
        if page > 1:
            result_dict['prev_page'] = page - 1
            result_dict['prev_page_show'] = True

            if (page - 1) == 1:
                result_dict[
                    'prev_page_link'] = "?query=" + query_terms.replace(
                        ' ', '+') + '&page=1&noperf=true'
            else:
                result_dict[
                    'prev_page_link'] = "?query=" + query_terms.replace(
                        ' ', '+') + '&page=' + str(page - 1)
        if page < num_pages:
            result_dict['next_page'] = page + 1
            result_dict['next_page_show'] = True
            result_dict['next_page_link'] = "?query=" + query_terms.replace(
                ' ', '+') + '&page=' + str(page + 1)

    # Disable performance logging - it's a hogging the performance!
    # If log_performance is True, we log the performance metrics.
    #if log_performance:
    #    log_event(event="QUERY_PERF",
    #              request=request,
    #              query=query_terms,
    #              metrics=get_query_performance_metrics(result_dict['trec_results'], ec['topicnum']))

    return result_dict
Exemplo n.º 9
0
def run_query(request, result_dict, query_terms='', page=1, page_len=10, condition=0, interface=1):
    """
    Helper method which populates the results dictionary, and send the user to the right interface.
    :param result_dict: dictionary with query terms
    :param query_terms:
    :param page:
    :param page_len:
    :param condition:
    :param interface:
    :return:
    """
    # Stops an AWFUL lot of problems when people get up to mischief

    log_event(event="QUERY_START", request=request, query=query_terms)

    if page < 1:
        page = 1

    query = Query(query_terms)
    query.skip = page
    query.top = page_len
    result_dict['query'] = query_terms

    print query
    search_engine = experiment_setups[condition].get_engine()

    snippet_sizes = [2, 0, 1, 4]
    snippet_surround = [40, 40, 40, 40]

    pos = interface - 1
    search_engine.snippet_size = snippet_sizes[pos]
    search_engine.set_fragmenter(frag_type=2, surround=snippet_surround[pos])

    response = search_engine.search(query)

    log_event(event="QUERY_END", request=request, query=query_terms)
    num_pages = response.total_pages

    result_dict['trec_results'] = None
    result_dict['trec_no_results_found'] = True
    result_dict['trec_search'] = False
    result_dict['num_pages'] = num_pages

    logging.debug('PAGE %d', num_pages)

    if num_pages > 0:
        result_dict['trec_search'] = True
        result_dict['trec_results'] = response.results[len(response.results)-page_len:len(response.results)]
        result_dict['curr_page'] = response.actual_page
        print response.actual_page
        if page > 1:
            result_dict['prev_page'] = page - 1
            result_dict['prev_page_show'] = True

            if (page - 1) == 1:
                result_dict['prev_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=1&noperf=true'
            else:
                result_dict['prev_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=' + str(page - 1)
        if page < num_pages:
            result_dict['next_page'] = page + 1
            result_dict['next_page_show'] = True
            result_dict['next_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=' + str(page + 1)
Exemplo n.º 10
0
 def __init__(self, key, vals, qrel_handler, engine=None, query_time=0):
     self.key = key
     self.qrel_handler = qrel_handler
     self.query = vals[9:]
     
     # There's quote marks at the start and end of the query, remove them
     self.query[0] = self.query[0][1:]
     self.query[-1] = self.query[-1][:-1]
     
     self.topic = vals[7]
     self.event_count = 0
     self.doc_count = 0
     self.doc_depth = 0
     self.hover_count = 0  # Added by David
     self.hover_depth = 0  # Added by David
     self.hover_trec_rel_count = 0
     self.hover_trec_nonrel_count = 0
     self.doc_rel_count = 0
     self.doc_marked_list = []
     self.doc_unmarked_list = []
     self.doc_rel_depth = 0
     self.doc_trec_rel_count = 0  # Records documents MARKED that are trec rel
     self.doc_trec_nonrel_count = 0  # Records documents MARKED that are not trec rel
     self.pages = 0
     self.curr_page = 1
     self.session_start_time = '{date} {time}'.format(date=vals[0],time=vals[1])
     self.session_end_time = None
     self.query_time = query_time
     self.session_time = 0.0
     self.snippet_time = 0.0
     self.document_time = 0.0
     self.view_serp_time = 0
     self.last_serp_view_time = None  # Added by David
     self.curr_event = None
     self.last_event = None
     self.last_interaction_event = None
     self.last_interaction_time = None
     self.last_last_event = None
     self.doc_click_time = False
     
     # Probability variables, added by David (2016-11-30)
     self.doc_clicked_trec_rel_count = 0
     self.doc_clicked_trec_nonrel_count = 0
     
     self.query_response = None  # Stores the results for parsing later on.
     
     # Testing by David for new SERP
     self.last_serp_event = None
     self.new_total_serp = 0.0
     
     # Additional attributes to store details on system lag and imposed delays
     self.serp_lag_calculated_for = []  # Added by David, list of times for the queries we've worked out lag for!
     self.serp_lag = 0.0  # Added by David
     
     self.last_query_delay_time = None
     self.imposed_query_delay = 0.0
     self.system_query_delay = 0.0
     self.total_query_duration = 0.0
     self.last_document_delay_time = None
     self.imposed_document_delay = 0.0
     self.document_lag = 0.0
     
     # issue query to whoosh and get performance values
     self.p = []
     self.perf = ['0.0  ' * 14]
     if engine:
         q = Query(' '.join(self.query))
         q.skip = 1
         q.top = 1000
         #print "Issuing {0}".format(q.terms)
         response = engine.search(q)
         (un, cond, interface, order, topicnum) = key.split(' ')
         self.perf = get_query_performance_metrics(self.qrel_handler, response.results, topicnum)
         self.query_response = response
         #print self.perf
     self.last_event='QUERY_ISSUED'
     self.last_time = '{date} {time}'.format(date=vals[0],time=vals[1])
Exemplo n.º 11
0
__author__ = 'Craig'
from ifind.search import Query, EngineFactory

q = Query("Google", top=5)
e = EngineFactory("Wikipedia")

print q
print e

response = e.search(q)

for r in response.results:
    print r
Exemplo n.º 12
0
def main():

    bm25 = Whooshtrec(
    whoosh_index_dir='fullindex/',
    stopwords_file='',
    model=1,
    newschema=True)


    query = Query('Sea Pirates')
    query.skip = 1
    query.top = 5

    bm25.snippet_size = 3


    response = bm25.search(query)
    i = 1
    for result in response.results:
        print i,len(result.summary)
        #print result.summary
        #print "--------------"
        soup = BeautifulSoup(result.summary,'html.parser')
        text = soup.getText()
        #print text


        print "--------------"

        n = extract_nouns(text)
        print set(n)
        print "--------------"

        sentences = nltk.sent_tokenize(text)
        tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]

        #print tokenized_sentences
        cat_sentences = []
        for ts in tokenized_sentences:
            for w in ts:
                cat_sentences.append(w)

        #print cat_sentences

        tagged =  nltk.pos_tag(cat_sentences)
        nouns = [word for word,pos in tagged if (pos == 'NN' or pos == 'NNP' or pos == 'NNS' or pos == 'NNPS')]
        downcased = [x.lower() for x in nouns]
        joined = " ".join(downcased).encode('utf-8')
        into_string = str(nouns)
        print (into_string)

        #print tokenized_sentences

        print "--------------"
        tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
        chunked_sentences = nltk.chunk.ne_chunk_sents(tagged_sentences, binary=True)
        entity_names = []
        for tree in chunked_sentences:
            # Print results per sentence
            # print extract_entity_names(tree)

            entity_names.extend(extract_entity_names(tree))

        print set(entity_names)

        i+=1
Exemplo n.º 13
0
from whoosh.index import open_dir
from whoosh.qparser import QueryParser

whoosh_path = sys.argv[1]
stopwords_path = sys.argv[2]

page = 3
page_len = 10

search_engine = Whooshtrec(whoosh_index_dir=whoosh_path,
                           stopwords_file=stopwords_path,
                           model=1,
                           newschema=True)

query = Query('wildlife extinction')
query.skip = page
query.top = page_len

response = search_engine.search(query)

for result in response:
    print '{0} {1}'.format(result.whooshid, result.rank)

print response.result_total
print response.results_on_page
print response.actual_page

########

print