def make_query(query_string): q = Query(query_string) q.skip = 1 q.top = 10 return q
def view_run_queries(request, topic_num): # from experiment_configuration import bm25 num = 0 query_file_name = os.path.join(data_dir, topic_num + '.queries') logging.debug(query_file_name) start_time = timeit.default_timer() query_list = [] with open(query_file_name, "r") as query_file: while query_file and num < 200: num += 1 line = query_file.readline() # print line parts = line.partition(' ') # print parts # TODO query_num = parts[0] query_str = unicode(parts[2]) if query_str: logging.debug(query_str) q = Query(query_str) q.skip = 1 # TODO response = bm25.search(q) query_list.append(query_str) else: break seconds = timeit.default_timer() - start_time context_dict = {'topic_num': topic_num, 'seconds': seconds, 'num': num} return render(request, 'base/query_test.html', context_dict)
def search(request): """ Accepts GET request containing query terms, searches and returns results as JSON. """ # invalid HTTP method if request.method != 'GET': return HttpResponse(status=405) # checks input for validity query_terms = check_input(request.GET.get('q', '')) # bad request if not query_terms: return HttpResponse(status=400) # get exp ID and load experiment exp_id = request.session.get('exp_id', False) experiment = get_or_create_experiment(exp_id) # execute query engine = EngineFactory('bing', api_key="") query = Query(query_terms, top=experiment['top'], result_type="image") response = engine.search(query) return HttpResponse(response.to_json(), content_type='application/json')
def run_query(query): q = Query(query, top=50) e = EngineFactory("Bing", api_key=bing_api_key) response = e.search(q) return response
def run_query(query, condition): q = Query(query, top=100) # check cache, if query is there, return results # else send query to bing, and store the results in the cache response = bing_engine.search(q) mod = conditions[condition] mod_response = mod(response) return mod_response
def run_query(query, condition): """ runs a search query on Bing using the query string passed, applies the relevant modifier function and returns the results :param query: (str)the query input by the user :param condition: (int)the interface condition applied to the user's profile :return: (Response)the results of the search after applying the correct modifier function """ q = Query(query, top=100) response = e.search(q) mod = conditions[condition] mod_results = mod(response) return mod_results
def run_query(request, result_dict={}, query_terms='', page=1, page_len=10, condition=0, log_performance=False): # Stops an AWFUL lot of problems when people get up to mischief if page < 1: page = 1 ec = get_experiment_context(request) query = Query(query_terms) query.skip = page query.top = page_len result_dict['query'] = query_terms search_engine = experiment_setups[condition].get_engine() result_cache = True response = None if result_cache: if cache.cache.get(str(query)): response = cache.cache.get(str(query)) else: response = search_engine.search(query) cache.cache.set(str(query),response, 500) else: response = search_engine.search(query) num_pages = response.total_pages result_dict['trec_results'] = None result_dict['trec_no_results_found'] = True result_dict['trec_search'] = False result_dict['num_pages'] = num_pages print "PAGE" print num_pages if num_pages > 0: result_dict['trec_search'] = True result_dict['trec_results'] = response.results result_dict['curr_page'] = response.actual_page if page > 1: result_dict['prev_page'] = page - 1 result_dict['prev_page_show'] = True if (page - 1) == 1: result_dict['prev_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=1&noperf=true' else: result_dict['prev_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=' + str(page - 1) if page < num_pages: result_dict['next_page'] = page + 1 result_dict['next_page_show'] = True result_dict['next_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=' + str(page + 1) # Disable performance logging - it's a hogging the performance! # If log_performance is True, we log the performance metrics. #if log_performance: # log_event(event="QUERY_PERF", # request=request, # query=query_terms, # metrics=get_query_performance_metrics(result_dict['trec_results'], ec['topicnum'])) return result_dict
def run_query(request, result_dict={}, query_terms='', page=1, page_len=10, condition=0, log_performance=False): # Stops an AWFUL lot of problems when people get up to mischief if page < 1: page = 1 ec = get_experiment_context(request) query = Query(query_terms) query.skip = page query.top = page_len result_dict['query'] = query_terms search_engine = experiment_setups[condition].get_engine() result_cache = True response = None if result_cache: if cache.cache.get(str(query)): response = cache.cache.get(str(query)) else: response = search_engine.search(query) cache.cache.set(str(query), response, 500) else: response = search_engine.search(query) num_pages = response.total_pages result_dict['trec_results'] = None result_dict['trec_no_results_found'] = True result_dict['trec_search'] = False result_dict['num_pages'] = num_pages print "PAGE" print num_pages if num_pages > 0: result_dict['trec_search'] = True result_dict['trec_results'] = response.results result_dict['curr_page'] = response.actual_page if page > 1: result_dict['prev_page'] = page - 1 result_dict['prev_page_show'] = True if (page - 1) == 1: result_dict[ 'prev_page_link'] = "?query=" + query_terms.replace( ' ', '+') + '&page=1&noperf=true' else: result_dict[ 'prev_page_link'] = "?query=" + query_terms.replace( ' ', '+') + '&page=' + str(page - 1) if page < num_pages: result_dict['next_page'] = page + 1 result_dict['next_page_show'] = True result_dict['next_page_link'] = "?query=" + query_terms.replace( ' ', '+') + '&page=' + str(page + 1) # Disable performance logging - it's a hogging the performance! # If log_performance is True, we log the performance metrics. #if log_performance: # log_event(event="QUERY_PERF", # request=request, # query=query_terms, # metrics=get_query_performance_metrics(result_dict['trec_results'], ec['topicnum'])) return result_dict
def run_query(request, result_dict, query_terms='', page=1, page_len=10, condition=0, interface=1): """ Helper method which populates the results dictionary, and send the user to the right interface. :param result_dict: dictionary with query terms :param query_terms: :param page: :param page_len: :param condition: :param interface: :return: """ # Stops an AWFUL lot of problems when people get up to mischief log_event(event="QUERY_START", request=request, query=query_terms) if page < 1: page = 1 query = Query(query_terms) query.skip = page query.top = page_len result_dict['query'] = query_terms print query search_engine = experiment_setups[condition].get_engine() snippet_sizes = [2, 0, 1, 4] snippet_surround = [40, 40, 40, 40] pos = interface - 1 search_engine.snippet_size = snippet_sizes[pos] search_engine.set_fragmenter(frag_type=2, surround=snippet_surround[pos]) response = search_engine.search(query) log_event(event="QUERY_END", request=request, query=query_terms) num_pages = response.total_pages result_dict['trec_results'] = None result_dict['trec_no_results_found'] = True result_dict['trec_search'] = False result_dict['num_pages'] = num_pages logging.debug('PAGE %d', num_pages) if num_pages > 0: result_dict['trec_search'] = True result_dict['trec_results'] = response.results[len(response.results)-page_len:len(response.results)] result_dict['curr_page'] = response.actual_page print response.actual_page if page > 1: result_dict['prev_page'] = page - 1 result_dict['prev_page_show'] = True if (page - 1) == 1: result_dict['prev_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=1&noperf=true' else: result_dict['prev_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=' + str(page - 1) if page < num_pages: result_dict['next_page'] = page + 1 result_dict['next_page_show'] = True result_dict['next_page_link'] = "?query=" + query_terms.replace(' ', '+') + '&page=' + str(page + 1)
def __init__(self, key, vals, qrel_handler, engine=None, query_time=0): self.key = key self.qrel_handler = qrel_handler self.query = vals[9:] # There's quote marks at the start and end of the query, remove them self.query[0] = self.query[0][1:] self.query[-1] = self.query[-1][:-1] self.topic = vals[7] self.event_count = 0 self.doc_count = 0 self.doc_depth = 0 self.hover_count = 0 # Added by David self.hover_depth = 0 # Added by David self.hover_trec_rel_count = 0 self.hover_trec_nonrel_count = 0 self.doc_rel_count = 0 self.doc_marked_list = [] self.doc_unmarked_list = [] self.doc_rel_depth = 0 self.doc_trec_rel_count = 0 # Records documents MARKED that are trec rel self.doc_trec_nonrel_count = 0 # Records documents MARKED that are not trec rel self.pages = 0 self.curr_page = 1 self.session_start_time = '{date} {time}'.format(date=vals[0],time=vals[1]) self.session_end_time = None self.query_time = query_time self.session_time = 0.0 self.snippet_time = 0.0 self.document_time = 0.0 self.view_serp_time = 0 self.last_serp_view_time = None # Added by David self.curr_event = None self.last_event = None self.last_interaction_event = None self.last_interaction_time = None self.last_last_event = None self.doc_click_time = False # Probability variables, added by David (2016-11-30) self.doc_clicked_trec_rel_count = 0 self.doc_clicked_trec_nonrel_count = 0 self.query_response = None # Stores the results for parsing later on. # Testing by David for new SERP self.last_serp_event = None self.new_total_serp = 0.0 # Additional attributes to store details on system lag and imposed delays self.serp_lag_calculated_for = [] # Added by David, list of times for the queries we've worked out lag for! self.serp_lag = 0.0 # Added by David self.last_query_delay_time = None self.imposed_query_delay = 0.0 self.system_query_delay = 0.0 self.total_query_duration = 0.0 self.last_document_delay_time = None self.imposed_document_delay = 0.0 self.document_lag = 0.0 # issue query to whoosh and get performance values self.p = [] self.perf = ['0.0 ' * 14] if engine: q = Query(' '.join(self.query)) q.skip = 1 q.top = 1000 #print "Issuing {0}".format(q.terms) response = engine.search(q) (un, cond, interface, order, topicnum) = key.split(' ') self.perf = get_query_performance_metrics(self.qrel_handler, response.results, topicnum) self.query_response = response #print self.perf self.last_event='QUERY_ISSUED' self.last_time = '{date} {time}'.format(date=vals[0],time=vals[1])
__author__ = 'Craig' from ifind.search import Query, EngineFactory q = Query("Google", top=5) e = EngineFactory("Wikipedia") print q print e response = e.search(q) for r in response.results: print r
def main(): bm25 = Whooshtrec( whoosh_index_dir='fullindex/', stopwords_file='', model=1, newschema=True) query = Query('Sea Pirates') query.skip = 1 query.top = 5 bm25.snippet_size = 3 response = bm25.search(query) i = 1 for result in response.results: print i,len(result.summary) #print result.summary #print "--------------" soup = BeautifulSoup(result.summary,'html.parser') text = soup.getText() #print text print "--------------" n = extract_nouns(text) print set(n) print "--------------" sentences = nltk.sent_tokenize(text) tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences] #print tokenized_sentences cat_sentences = [] for ts in tokenized_sentences: for w in ts: cat_sentences.append(w) #print cat_sentences tagged = nltk.pos_tag(cat_sentences) nouns = [word for word,pos in tagged if (pos == 'NN' or pos == 'NNP' or pos == 'NNS' or pos == 'NNPS')] downcased = [x.lower() for x in nouns] joined = " ".join(downcased).encode('utf-8') into_string = str(nouns) print (into_string) #print tokenized_sentences print "--------------" tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences] chunked_sentences = nltk.chunk.ne_chunk_sents(tagged_sentences, binary=True) entity_names = [] for tree in chunked_sentences: # Print results per sentence # print extract_entity_names(tree) entity_names.extend(extract_entity_names(tree)) print set(entity_names) i+=1
from whoosh.index import open_dir from whoosh.qparser import QueryParser whoosh_path = sys.argv[1] stopwords_path = sys.argv[2] page = 3 page_len = 10 search_engine = Whooshtrec(whoosh_index_dir=whoosh_path, stopwords_file=stopwords_path, model=1, newschema=True) query = Query('wildlife extinction') query.skip = page query.top = page_len response = search_engine.search(query) for result in response: print '{0} {1}'.format(result.whooshid, result.rank) print response.result_total print response.results_on_page print response.actual_page ######## print