def searchIndices(indices, query, period, lastId=None, reverse=False, fields=None, limit=100): """ Search the specified indices for events matching the specified query. :param indices: A list of indices to search. :type indices: A list of objects implementing :class:`terane.bier.index.IIndex` :param query: The programmatic query to use for searching the indices. :type query: An object implementing :class:`terane.bier.searching.IQuery` :param period: The period within which the search is constrained. :type period: :class:`terane.bier.searching.Period` :param lastId: The real key to start iterating from. :type lastId: :class:`terane.bier.evid.EVID` :param reverse: If True, then reverse the order of events. :type reverse: bool :param fields: If not None, then only return the specified fields of each event. :type fields: list or None :param limit: Only returned the specified number of events. :type limit: int :returns: A CooperativeTask which contains a Deferred and manages the search task. :rtype: :class:`twisted.internet.task.CooperativeTask` """ start = time.time() # determine the evids to use as start and end keys if reverse == False: startId, endId = period.getRange() else: endId, startId = period.getRange() if lastId != None: if not lastId in period: raise SearcherError("lastId %s is not within period" % lastId) startId = lastId # search each index separately, then merge the results try: searchers = [] postingLists = [] for index in indices: if not IIndex.providedBy(index): raise TypeError("index does not implement IIndex") # we create a copy of the original query, which can possibly be optimized # with index-specific knowledge. _query = copy.deepcopy(query) try: _query = _query.optimizeMatcher(index) except NotImplementedError, e: raise SearcherError(str(e)) logger.debug("optimized query for index '%s': %s" % (index.name, str(_query))) # if the query optimized out entirely, then skip to the next index if _query == None: continue # get the posting list to iterate through searcher = index.newSearcher() if not ISearcher.providedBy(searcher): raise TypeError("searcher does not implement ISearcher") postingList = _query.iterMatches(searcher, startId, endId) if not IPostingList.providedBy(postingList): raise TypeError("posting list does not implement IPostingList") searchers.append(searcher) postingLists.append(postingList) # return a cooperative task return cooperate(ResultSet(searchers, postingLists, start, reverse, fields, limit))
def next(self): start = time.time() searchers = [] postingLists = [] try: # get a searcher and posting list for each index for index in self._indices: # we create a copy of the original query, which can possibly be optimized # with index-specific knowledge. query = copy.deepcopy(self._query) # get the posting list to iterate through searcher = yield index.newSearcher() if not ISearcher.providedBy(searcher): raise TypeError("searcher does not implement ISearcher") query = yield query.optimizeMatcher(searcher) logger.debug("optimized query for index '%s': %s" % (index.name,str(query))) # if the query optimized out entirely, then skip to the next index if query == None: yield searcher.close() continue postingList = yield query.iterMatches(searcher, self._startId, self._endId) if not IPostingList.providedBy(postingList): raise TypeError("posting list does not implement IPostingList") searchers.append(searcher) postingLists.append(postingList) if len(postingLists) == 0: raise StopIteration() # loop forever until we reach the search limit, we exhaust all of our # posting lists, or we encounter an exception currPostings = [(None,None,None) for i in range(len(postingLists))] smallestList = 0 lastId = None compar = (lambda x,y: cmp(y,x)) if self._reverse else cmp while True: # if we have reached our limit if len(self.events) == self._limit: self.runtime = time.time() - start raise StopIteration() # check each child iter for the lowest evid for currList in range(len(postingLists)): if currList == None: smallestList = 0 # if the postingList is None, then its closed if postingLists[currList] == None: # FIXME: close the posting list and searcher continue # if current posting for this posting list was not consumed if currPostings[currList] != (None,None,None): continue # otherwise get the next posting from the posting list currPostings[currList] = yield postingLists[currList].nextPosting() # if the next posting for this posting list is (None,None,None), # then we are done with this posting list if currPostings[currList] == (None,None,None): postingList = postingLists[currList] yield postingList.close() postingLists[currList] = None continue # if the evid equals the last evid returned, then ignore it if lastId != None and currPostings[currList][0] == lastId: currPostings[currList] = (None,None,None) continue # we don't compare the first evid with itself if currList == 0: continue # if the evid is the current smallest evid, then remember it if (currPostings[smallestList] == (None,None,None) or compar(currPostings[currList][0], currPostings[smallestList][0]) < 0): smallestList = currList # get the next posting currList = None evid,_,store = currPostings[smallestList] # stop iterating if there are no more results if evid == None: self.runtime = time.time() - start raise StopIteration() # remember the last evid lastId = evid # forget the evid so we don't return it again currPostings[smallestList] = (None,None,None) # retrieve the event if not IEventStore.providedBy(store): raise TypeError("store does not implement IEventStore") event = yield store.getEvent(evid) defaultfield, defaultvalue, fields = event if defaultfield not in self.fields: self.fields.append(defaultfield) # keep a record of all field names found in the results for fieldname in fields.keys(): if fieldname not in self.fields: self.fields.append(fieldname) # filter out unwanted fields if self._fields != None: fields = dict([(k,v) for k,v in fields.items() if k in self._fields]) self.events.append(((evid.ts,evid.offset), defaultfield, defaultvalue, fields)) logger.trace("added event %s to results" % evid) finally: for postingList in postingLists: if postingList != None: yield postingList.close() for searcher in searchers: yield searcher.close()
def next(self): start = time.time() searchers = [] postingLists = [] try: # get a searcher and posting list for each index for index in self._indices: # we create a copy of the original query, which can possibly be optimized # with index-specific knowledge. query = copy.deepcopy(self._query) # get the posting list to iterate through searcher = yield index.newSearcher() if not ISearcher.providedBy(searcher): raise TypeError("searcher does not implement ISearcher") query = yield query.optimizeMatcher(searcher) logger.debug("optimized query for index '%s': %s" % (index.name, str(query))) # if the query optimized out entirely, then skip to the next index if query == None: yield searcher.close() continue postingList = yield query.iterMatches(searcher, self._startId, self._endId) if not IPostingList.providedBy(postingList): raise TypeError( "posting list does not implement IPostingList") searchers.append(searcher) postingLists.append(postingList) if len(postingLists) == 0: raise StopIteration() # loop forever until we reach the search limit, we exhaust all of our # posting lists, or we encounter an exception currPostings = [(None, None, None) for i in range(len(postingLists))] smallestList = 0 lastId = None compar = (lambda x, y: cmp(y, x)) if self._reverse else cmp while True: # if we have reached our limit if len(self.events) == self._limit: self.runtime = time.time() - start raise StopIteration() # check each child iter for the lowest evid for currList in range(len(postingLists)): if currList == None: smallestList = 0 # if the postingList is None, then its closed if postingLists[currList] == None: # FIXME: close the posting list and searcher continue # if current posting for this posting list was not consumed if currPostings[currList] != (None, None, None): continue # otherwise get the next posting from the posting list currPostings[currList] = yield postingLists[ currList].nextPosting() # if the next posting for this posting list is (None,None,None), # then we are done with this posting list if currPostings[currList] == (None, None, None): postingList = postingLists[currList] yield postingList.close() postingLists[currList] = None continue # if the evid equals the last evid returned, then ignore it if lastId != None and currPostings[currList][0] == lastId: currPostings[currList] = (None, None, None) continue # we don't compare the first evid with itself if currList == 0: continue # if the evid is the current smallest evid, then remember it if (currPostings[smallestList] == (None, None, None) or compar(currPostings[currList][0], currPostings[smallestList][0]) < 0): smallestList = currList # get the next posting currList = None evid, _, store = currPostings[smallestList] # stop iterating if there are no more results if evid == None: self.runtime = time.time() - start raise StopIteration() # remember the last evid lastId = evid # forget the evid so we don't return it again currPostings[smallestList] = (None, None, None) # retrieve the event if not IEventStore.providedBy(store): raise TypeError("store does not implement IEventStore") event = yield store.getEvent(evid) defaultfield, defaultvalue, fields = event if defaultfield not in self.fields: self.fields.append(defaultfield) # keep a record of all field names found in the results for fieldname in fields.keys(): if fieldname not in self.fields: self.fields.append(fieldname) # filter out unwanted fields if self._fields != None: fields = dict([(k, v) for k, v in fields.items() if k in self._fields]) self.events.append(((evid.ts, evid.offset), defaultfield, defaultvalue, fields)) logger.trace("added event %s to results" % evid) finally: for postingList in postingLists: if postingList != None: yield postingList.close() for searcher in searchers: yield searcher.close()