def create_query_object(): """ Nested method which returns a Query object for the given query string, page number and page length attributes from the parent method. """ query_object = Query(query_text) query_object.skip = page query_object.top = page_len response = self.__search_interface.issue_query(query_object) query_object.response = response return query_object
def create_query_object(): """ Nested method which returns a Query object for the given query string, page number and page length attributes from the parent method. """ query_object = Query(query_text) query_object.skip = page query_object.top = page_len response = self._search_interface.issue_query(query_object) query_object.response = response return query_object
def _make_query_dict(self, query_list): """ generates a list of queries from plain text :return returns a dictionary of query objects with the key being query terms, value as query object """ self.query_dict = {} for q in query_list: aQ = Query(terms=q, top=self.c) aQ.rank = 0 aQ.ret_score = 0.0 self.query_dict[q] = aQ self.query_count = len(self.query_dict)
def issue_query(self, text, page=1, pagelen=100): """ Creates a Query object, issues query to the search engine, attaches response to query object :param text: query string :param page: integer :param pagelen: integer :return: ifind.search.Query """ q = Query(text) q.skip = page q.top = pagelen response = self.si.issue_query(q) q.response = response return q
def cache_queries(): execution_time = timeit.default_timer() query_list = read_query_terms() for query in query_list: if not (query.isspace() or query == ""): query_start_time = timeit.default_timer() print "-" * 80 print "> {0}".format(query) query = Query(terms=query) query.top = 10 query.skip = 1 ENGINE.search(query) print " >> Elapsed time: {0:.2f} second(s)".format(timeit.default_timer() - query_start_time) print "=" * 80 print "> Total execution time: {0:.2f} seconds".format(timeit.default_timer() - execution_time) print "> Page caching thread will die shortly, or just kill the Python process."
def run_queries(engine, query_file, result_file): infile = open(query_file, "r") outfile = open(result_file, "w") while infile: line = infile.readline() parts = line.partition(' ') query_num = parts[0] query_str = unicode(parts[2]) max_limit = 1000 print query_num, query_str def buildQueryParts(term_list, op): qp = '' for t in term_list: if t: if qp: qp = qp + " " + op + " " + t else: qp = t return qp or_query = buildQueryParts(query_str.split(' '), 'OR') query = Query(terms=or_query, top=1000) #query = Query(terms=query_str,top=1000) query.skip = 1 response = engine.search(query) if response: print query_num + " " + str(len(response.results)) rank = 0 for r in response.results: rank = rank + 1 trec_line = query_num + " Q0 " + r.docid + " " + str( rank) + " " + str(1000 - rank) + " Exp\n" outfile.write(trec_line) if not line: break
def get_perf(queries_file, qrels_file, output_file): ''' Goes and works out the performance for each query. ''' qf = open(queries_file, 'r') of = open(output_file, 'w') qrels = TrecQrelHandler(qrels_file) for line in qf: line = line.strip() line = line.split() qid = line[0] topic = line[0].split('-')[0] query = ' '.join(line[1:]) print "Query {0}: '{1}'".format(qid, query) time_start = time.time() q = Query(query, top=100) q_results = engine.search(q) p_at_1 = calculate_precision(qrels, q_results, topic, 1) p_at_2 = calculate_precision(qrels, q_results, topic, 2) p_at_3 = calculate_precision(qrels, q_results, topic, 3) p_at_4 = calculate_precision(qrels, q_results, topic, 4) p_at_5 = calculate_precision(qrels, q_results, topic, 5) p_at_10 = calculate_precision(qrels, q_results, topic, 10) p_at_15 = calculate_precision(qrels, q_results, topic, 15) p_at_20 = calculate_precision(qrels, q_results, topic, 20) p_at_30 = calculate_precision(qrels, q_results, topic, 30) time_end = time.time() time_elapsed = time_end - time_start of.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}{11}". format(qid, time_elapsed, p_at_1, p_at_2, p_at_3, p_at_4, p_at_5, p_at_10, p_at_15, p_at_20, p_at_30, os.linesep)) print "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}{11}".format( qid, time_elapsed, p_at_1, p_at_2, p_at_3, p_at_4, p_at_5, p_at_10, p_at_15, p_at_20, p_at_30, os.linesep) print "execution time: {0:.3f} seconds".format(time_elapsed) print "p@1: {0:.3f}\tp@5: {1:.3f}\tp@10: {2:.3f}\tp@20: {3:.3f}".format( p_at_1, p_at_5, p_at_10, p_at_20) print qf.close() of.close()
def run_queries(engine, query_file, result_file): infile = open(query_file,"r") outfile = open(result_file,"w") while infile: line = infile.readline() parts = line.partition(' ') query_num = parts[0] query_str = unicode( parts[2] ) max_limit = 1000 print query_num, query_str def buildQueryParts(term_list, op): qp = '' for t in term_list: if t: if qp: qp = qp + " "+ op +" " + t else: qp = t return qp or_query= buildQueryParts(query_str.split(' '), 'OR') query = Query(terms=or_query,top=1000) #query = Query(terms=query_str,top=1000) query.skip = 1 response = engine.search(query) if response: print query_num + " " + str(len(response.results)) rank = 0 for r in response.results: rank = rank + 1 trec_line = query_num + " Q0 " + r.docid + " " + str( rank) + " " + str(1000-rank) + " Exp\n" outfile.write(trec_line) if not line: break
def cache_queries(): execution_time = timeit.default_timer() query_list = read_query_terms() for query in query_list: if not (query.isspace() or query == ""): query_start_time = timeit.default_timer() print "-" * 80 print "> {0}".format(query) query = Query(terms=query) query.top = 10 query.skip = 1 ENGINE.search(query) print " >> Elapsed time: {0:.2f} second(s)".format( timeit.default_timer() - query_start_time) print "=" * 80 print "> Total execution time: {0:.2f} seconds".format( timeit.default_timer() - execution_time) print "> Page caching thread will die shortly, or just kill the Python process."
def fetch_results(queries_list): """Builds a list of tuples (category,url,rank) and returns it """ myengine = EngineFactory('bing', api_key=API_KEY) result_list = [] for term in queries_list: query = Query(term[1], top=30) response = myengine.search(query) #TODO implement select_ranks properly maybe (num_to_select,step) rank_list = select_ranks(6, 10) #TODO make this arguments for rank in rank_list: #term[0] is trend categoty, term[1] is search term try: result_list.append((term[0], response.results[rank].url, rank)) #print "appended" + term[0] + response.results[rank].url except IndexError: print "index error.." print result_list[:] return result_list
def _has_query_been_issued(self, issued_query_list, query_candidate): """ By examining previously examined queries in the search session, returns a boolean indicating whether the query terms provided have been previously examined. True iif they have, False otherwise. :param: issued_query_list is a list of ifind.search.query objects :param query_candidate: string of query terms """ query_candidate_object = Query( query_candidate ) # Strip punctutation, etc - so we compare like-for-like! query_candidate_processed = query_candidate_object.terms for query in issued_query_list: query_str = query.terms if query_candidate_processed == query_str: return True return False
def _run_query2(self, query): import sys path = '/home/arazzouk/ifind' if path not in sys.path: sys.path.append(path) from ifind.search.query import Query from ifind.search.response import Response """ constructs ifind.search.query, and issues it to the search_engine :param query: :return: ifind.search.response """ # construct ifind.search.query Query iquery = Query(query, result_type="web") # issue query to self.search_engine iresponse = self.search_engine.search(iquery) return iresponse.to_json
query = query.strip() query = query.lower() query = query.replace('"', '') query = query.split() new_query = "" for term in query: if term not in ignore_terms: new_query = new_query + term + ' ' new_query = new_query.strip() query_list.append(new_query) input_file.close() return query_list query_list = get_query_list() for query in query_list: query_obj = Query(terms=query, top=top) query_obj.skip = skip start_time = timeit.default_timer() results = engine.search(query_obj) print print "Query '{0}' executed in {1:.2f} seconds".format(query, (timeit.default_timer() - start_time)) print "Got {0} result(s)".format(len(results)) print
query = query.replace('"', '') query = query.split() new_query = "" for term in query: if term not in ignore_terms: new_query = new_query + term + ' ' new_query = new_query.strip() query_list.append(new_query) input_file.close() return query_list query_list = get_query_list() for query in query_list: query_obj = Query(terms=query, top=top) query_obj.skip = skip start_time = timeit.default_timer() results = engine.search(query_obj) print print "Query '{0}' executed in {1:.2f} seconds".format( query, (timeit.default_timer() - start_time)) print "Got {0} result(s)".format(len(results)) print