Пример #1
0
        def create_query_object():
            """
            Nested method which returns a Query object for the given query string, page number and page length attributes from the parent method.
            """
            query_object = Query(query_text)
            query_object.skip = page
            query_object.top = page_len

            response = self.__search_interface.issue_query(query_object)
            query_object.response = response

            return query_object
Пример #2
0
        def create_query_object():
            """
            Nested method which returns a Query object for the given query string, page number and page length attributes from the parent method.
            """
            query_object = Query(query_text)
            query_object.skip = page
            query_object.top = page_len

            response = self._search_interface.issue_query(query_object)
            query_object.response = response

            return query_object
Пример #3
0
    def _make_query_dict(self, query_list):
        """
        generates a list of queries from plain text
        :return returns a dictionary of query objects with the key being query terms, value as query object

        """
        self.query_dict = {}

        for q in query_list:
            aQ = Query(terms=q, top=self.c)
            aQ.rank = 0
            aQ.ret_score = 0.0
            self.query_dict[q] = aQ
        self.query_count = len(self.query_dict)
Пример #4
0
    def _make_query_dict(self, query_list):
        """
        generates a list of queries from plain text
        :return returns a dictionary of query objects with the key being query terms, value as query object

        """
        self.query_dict = {}

        for q in query_list:
            aQ = Query(terms=q, top=self.c)
            aQ.rank = 0
            aQ.ret_score = 0.0
            self.query_dict[q] = aQ
        self.query_count = len(self.query_dict)
Пример #5
0
    def issue_query(self, text, page=1, pagelen=100):
        """ Creates a Query object, issues query to the search engine, attaches response to query object
        :param text: query string
        :param page: integer
        :param pagelen: integer
        :return: ifind.search.Query
        """
        q = Query(text)
        q.skip = page
        q.top = pagelen
        response = self.si.issue_query(q)
        q.response = response

        return q
Пример #6
0
def cache_queries():
    execution_time = timeit.default_timer()
    query_list = read_query_terms()

    for query in query_list:
        if not (query.isspace() or query == ""):
            query_start_time = timeit.default_timer()
            print "-" * 80
            print "> {0}".format(query)

            query = Query(terms=query)
            query.top = 10
            query.skip = 1

            ENGINE.search(query)

            print "  >> Elapsed time: {0:.2f} second(s)".format(timeit.default_timer() - query_start_time)

    print "=" * 80
    print "> Total execution time: {0:.2f} seconds".format(timeit.default_timer() - execution_time)
    print "> Page caching thread will die shortly, or just kill the Python process."
Пример #7
0
def run_queries(engine, query_file, result_file):
    infile = open(query_file, "r")
    outfile = open(result_file, "w")
    while infile:
        line = infile.readline()
        parts = line.partition(' ')
        query_num = parts[0]
        query_str = unicode(parts[2])
        max_limit = 1000
        print query_num, query_str

        def buildQueryParts(term_list, op):
            qp = ''
            for t in term_list:
                if t:
                    if qp:
                        qp = qp + " " + op + " " + t
                    else:
                        qp = t
            return qp

        or_query = buildQueryParts(query_str.split(' '), 'OR')

        query = Query(terms=or_query, top=1000)
        #query = Query(terms=query_str,top=1000)
        query.skip = 1
        response = engine.search(query)

        if response:
            print query_num + " " + str(len(response.results))

            rank = 0
            for r in response.results:
                rank = rank + 1
                trec_line = query_num + " Q0 " + r.docid + " " + str(
                    rank) + " " + str(1000 - rank) + " Exp\n"
                outfile.write(trec_line)

        if not line:
            break
Пример #8
0
def get_perf(queries_file, qrels_file, output_file):
    '''
    Goes and works out the performance for each query.
    '''
    qf = open(queries_file, 'r')
    of = open(output_file, 'w')

    qrels = TrecQrelHandler(qrels_file)

    for line in qf:
        line = line.strip()
        line = line.split()

        qid = line[0]
        topic = line[0].split('-')[0]
        query = ' '.join(line[1:])

        print "Query {0}: '{1}'".format(qid, query)
        time_start = time.time()

        q = Query(query, top=100)
        q_results = engine.search(q)

        p_at_1 = calculate_precision(qrels, q_results, topic, 1)
        p_at_2 = calculate_precision(qrels, q_results, topic, 2)
        p_at_3 = calculate_precision(qrels, q_results, topic, 3)
        p_at_4 = calculate_precision(qrels, q_results, topic, 4)
        p_at_5 = calculate_precision(qrels, q_results, topic, 5)
        p_at_10 = calculate_precision(qrels, q_results, topic, 10)
        p_at_15 = calculate_precision(qrels, q_results, topic, 15)
        p_at_20 = calculate_precision(qrels, q_results, topic, 20)
        p_at_30 = calculate_precision(qrels, q_results, topic, 30)

        time_end = time.time()
        time_elapsed = time_end - time_start

        of.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}{11}".
                 format(qid, time_elapsed, p_at_1, p_at_2, p_at_3, p_at_4,
                        p_at_5, p_at_10, p_at_15, p_at_20, p_at_30,
                        os.linesep))

        print "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\t{10}{11}".format(
            qid, time_elapsed, p_at_1, p_at_2, p_at_3, p_at_4, p_at_5, p_at_10,
            p_at_15, p_at_20, p_at_30, os.linesep)

        print "execution time: {0:.3f} seconds".format(time_elapsed)
        print "p@1: {0:.3f}\tp@5: {1:.3f}\tp@10: {2:.3f}\tp@20: {3:.3f}".format(
            p_at_1, p_at_5, p_at_10, p_at_20)
        print

    qf.close()
    of.close()
Пример #9
0
def run_queries(engine, query_file, result_file):
    infile = open(query_file,"r")
    outfile = open(result_file,"w")
    while infile:
        line = infile.readline()
        parts = line.partition(' ')
        query_num = parts[0]
        query_str = unicode( parts[2] )
        max_limit = 1000
        print query_num, query_str

        def buildQueryParts(term_list, op):
            qp = ''
            for t in term_list:
                if t:
                    if qp:
                        qp = qp + " "+ op  +" " + t
                    else:
                        qp = t
            return qp

        or_query=  buildQueryParts(query_str.split(' '), 'OR')

        query = Query(terms=or_query,top=1000)
        #query = Query(terms=query_str,top=1000)
        query.skip = 1
        response = engine.search(query)

        if response:
            print query_num + " " + str(len(response.results))

            rank = 0
            for r in response.results:
                rank = rank + 1
                trec_line = query_num + " Q0 " + r.docid + " " + str( rank) + " " + str(1000-rank) + " Exp\n"
                outfile.write(trec_line)

        if not line:
            break
Пример #10
0
def cache_queries():
    execution_time = timeit.default_timer()
    query_list = read_query_terms()

    for query in query_list:
        if not (query.isspace() or query == ""):
            query_start_time = timeit.default_timer()
            print "-" * 80
            print "> {0}".format(query)

            query = Query(terms=query)
            query.top = 10
            query.skip = 1

            ENGINE.search(query)

            print "  >> Elapsed time: {0:.2f} second(s)".format(
                timeit.default_timer() - query_start_time)

    print "=" * 80
    print "> Total execution time: {0:.2f} seconds".format(
        timeit.default_timer() - execution_time)
    print "> Page caching thread will die shortly, or just kill the Python process."
Пример #11
0
def fetch_results(queries_list):
    """Builds a list of tuples (category,url,rank) and returns it """
    myengine = EngineFactory('bing', api_key=API_KEY)
    result_list = []
    for term in queries_list:
        query = Query(term[1], top=30)
        response = myengine.search(query)
        #TODO implement select_ranks properly maybe (num_to_select,step)
        rank_list = select_ranks(6, 10)  #TODO make this arguments
        for rank in rank_list:
            #term[0] is trend categoty, term[1] is search term
            try:
                result_list.append((term[0], response.results[rank].url, rank))
                #print "appended" + term[0] + response.results[rank].url
            except IndexError:
                print "index error.."

    print result_list[:]
    return result_list
Пример #12
0
    def _has_query_been_issued(self, issued_query_list, query_candidate):
        """
        By examining previously examined queries in the search session, returns a boolean indicating whether
        the query terms provided have been previously examined. True iif they have, False otherwise.
        :param: issued_query_list is a list of  ifind.search.query objects
        :param query_candidate: string of query terms
        """
        query_candidate_object = Query(
            query_candidate
        )  # Strip punctutation, etc - so we compare like-for-like!
        query_candidate_processed = query_candidate_object.terms

        for query in issued_query_list:
            query_str = query.terms

            if query_candidate_processed == query_str:
                return True

        return False
Пример #13
0
    def _run_query2(self, query):
        import sys
        path = '/home/arazzouk/ifind'
        if path not in sys.path:
            sys.path.append(path)
        from ifind.search.query import Query
        from ifind.search.response import Response
        """ constructs ifind.search.query, and issues it to the search_engine

        :param query:
        :return: ifind.search.response
        """
        # construct ifind.search.query Query
        iquery = Query(query, result_type="web")

        # issue query to self.search_engine
        iresponse = self.search_engine.search(iquery)

        return iresponse.to_json
Пример #14
0
		query = query.strip()
		query = query.lower()
		query = query.replace('"', '')
		
		query = query.split()
		new_query = ""
		
		for term in query:
			if term not in ignore_terms:
				new_query = new_query + term + ' '
		
		new_query = new_query.strip()
		query_list.append(new_query)
	
	input_file.close()
	return query_list

query_list = get_query_list()

for query in query_list:
	query_obj = Query(terms=query, top=top)
	query_obj.skip = skip
	
	start_time = timeit.default_timer()
	
	results = engine.search(query_obj)
	
	print
	print "Query '{0}' executed in {1:.2f} seconds".format(query, (timeit.default_timer() - start_time))
	print "Got {0} result(s)".format(len(results))
	print
Пример #15
0
        query = query.replace('"', '')

        query = query.split()
        new_query = ""

        for term in query:
            if term not in ignore_terms:
                new_query = new_query + term + ' '

        new_query = new_query.strip()
        query_list.append(new_query)

    input_file.close()
    return query_list


query_list = get_query_list()

for query in query_list:
    query_obj = Query(terms=query, top=top)
    query_obj.skip = skip

    start_time = timeit.default_timer()

    results = engine.search(query_obj)

    print
    print "Query '{0}' executed in {1:.2f} seconds".format(
        query, (timeit.default_timer() - start_time))
    print "Got {0} result(s)".format(len(results))
    print