Пример #1
0
    def defaultSearch(self, req, expectedValues=None, verbose=False):

        rs = None
        for index in self._indexes:
            st = time()
            duration = (time() - st) * 1000

            limit_result = ILimitedResultIndex.providedBy(index)
            if limit_result:
                r = index._apply_index(req, rs)
            else:
                r = index._apply_index(req)
            duration = (time() - st) * 1000

            if r is not None:
                r, u = r
                w, rs = weightedIntersection(rs, r)
                if not rs:
                    break

            if verbose and (index.id in req):
                logger.info('index %s: %s hits in %3.2fms', index.id,
                            r and len(r) or 0, duration)

        if not rs:
            return set()

        if hasattr(rs, 'keys'):
            rs = rs.keys()

        return set(rs)
    def defaultSearch(self, req, expectedValues=None, verbose=False):

        rs = None
        for index in self._indexes:
            st = time()
            duration = (time() - st) * 1000

            limit_result = ILimitedResultIndex.providedBy(index)
            if limit_result:
                r = index._apply_index(req, rs)
            else:
                r = index._apply_index(req)
            duration = (time() - st) * 1000

            if r is not None:
                r, u = r
                w, rs = weightedIntersection(rs, r)
                if not rs:
                    break

            if verbose and (index.id in req):
                logger.info('index %s: %s hits in %3.2fms',
                            index.id, r and len(r) or 0, duration)

        if not rs:
            return set()

        try:
            rs = rs.keys()
        except AttributeError:
            pass

        return set(rs)
Пример #3
0
 def _sorted_search_indexes(self, query):
     # Simple implementation ordering only by limited result support
     query_keys = query.keys()
     order = []
     for name, index in self.indexes.items():
         for attr in self._get_index_query_names(index):
             if attr in query_keys:
                 order.append((ILimitedResultIndex.providedBy(index), name))
     order.sort()
     return [i[1] for i in order]
Пример #4
0
 def _sorted_search_indexes(self, query):
     # Simple implementation doing no ordering.
     query_keys = query.keys()
     order = []
     for name, index in self.indexes.items():
         if name not in query_keys:
             continue
         order.append((ILimitedResultIndex.providedBy(index), name))
     order.sort()
     return [i[1] for i in order]
Пример #5
0
 def _sorted_search_indexes(self, query):
     # Simple implementation ordering only by limited result support
     query_keys = query.keys()
     order = []
     for name, index in self.indexes.items():
         for attr in self._get_index_query_names(index):
             if attr in query_keys:
                 order.append((ILimitedResultIndex.providedBy(index), name))
     order.sort()
     return [i[1] for i in order]
Пример #6
0
 def _sorted_search_indexes(self, query):
     # Simple implementation doing no ordering.
     query_keys = query.keys()
     order = []
     for name, index in self.indexes.items():
         if name not in query_keys:
             continue
         order.append((ILimitedResultIndex.providedBy(index), name))
     order.sort()
     return [i[1] for i in order]
Пример #7
0
    def _search_index(self, cr, index_id, query, rs):
        cr.start_split(index_id)

        index_rs = None
        index = self.getIndex(index_id)
        limit_result = ILimitedResultIndex.providedBy(index)

        if IQueryIndex.providedBy(index):
            index_query = IndexQuery(query, index.id, index.query_options,
                                     index.operators, index.useOperator)
            if index_query.keys is not None:
                index_rs = index.query_index(index_query, rs)
        else:
            if limit_result:
                index_result = index._apply_index(query, rs)
            else:
                index_result = index._apply_index(query)

            # Parse (resultset, used_attributes) index return value.
            if index_result:
                index_rs, _ = index_result

        if not index_rs:
            # Short circuit if empty index result.
            rs = None
        else:
            # Provide detailed info about the pure intersection time.
            intersect_id = index_id + '#intersection'
            cr.start_split(intersect_id)
            # weightedIntersection preserves the values from any mappings
            # we get, as some indexes don't return simple sets.
            if hasattr(rs, 'items') or hasattr(index_rs, 'items'):
                _, rs = weightedIntersection(rs, index_rs)
            else:
                rs = intersection(rs, index_rs)

            cr.stop_split(intersect_id)

        # Consider the time it takes to intersect the index result
        # with the total result set to be part of the index time.
        cr.stop_split(index_id, result=index_rs, limit=limit_result)

        return rs
Пример #8
0
    def _search_index(self, cr, index_id, query, rs):
        cr.start_split(index_id)

        index_rs = None
        index = self.getIndex(index_id)
        limit_result = ILimitedResultIndex.providedBy(index)

        if IQueryIndex.providedBy(index):
            index_query = IndexQuery(query, index.id, index.query_options,
                                     index.operators, index.useOperator)
            if index_query.keys is not None:
                index_rs = index.query_index(index_query, rs)
        else:
            if limit_result:
                index_result = index._apply_index(query, rs)
            else:
                index_result = index._apply_index(query)

            # Parse (resultset, used_attributes) index return value.
            if index_result:
                index_rs, _ = index_result

        if not index_rs:
            # Short circuit if empty index result.
            rs = None
        else:
            # Provide detailed info about the pure intersection time.
            intersect_id = index_id + '#intersection'
            cr.start_split(intersect_id)
            # weightedIntersection preserves the values from any mappings
            # we get, as some indexes don't return simple sets.
            if hasattr(rs, 'items') or hasattr(index_rs, 'items'):
                _, rs = weightedIntersection(rs, index_rs)
            else:
                rs = intersection(rs, index_rs)

            cr.stop_split(intersect_id)

        # Consider the time it takes to intersect the index result
        # with the total result set to be part of the index time.
        cr.stop_split(index_id, result=index_rs, limit=limit_result)

        return rs
Пример #9
0
    def search(self,
            query, sort_index=None, reverse=False, limit=None, merge=True):
        """Iterate through the indexes, applying the query to each one. If
        merge is true then return a lazy result set (sorted if appropriate)
        otherwise return the raw (possibly scored) results for later merging.
        Limit is used in conjuntion with sorting or scored results to inform
        the catalog how many results you are really interested in. The catalog
        can then use optimizations to save time and memory. The number of
        results is not guaranteed to fall within the limit however, you should
        still slice or batch the results as usual."""

        # Indexes fulfill a fairly large contract here. We hand each
        # index the query mapping we are given (which may be composed
        # of some combination of web request, kw mappings or plain old dicts)
        # and the index decides what to do with it. If the index finds work
        # for itself in the query, it returns the results and a tuple of
        # the attributes that were used. If the index finds nothing for it
        # to do then it returns None.

        # Canonicalize the request into a sensible query before passing it on
        query = self.make_query(query)

        cr = self.getCatalogPlan(query)
        cr.start()

        plan = cr.plan()
        if not plan:
            plan = self._sorted_search_indexes(query)

        rs = None  # result set
        indexes = self.indexes.keys()
        for i in plan:
            if i not in indexes:
                # We can have bogus keys or the plan can contain index names
                # that have been removed in the meantime
                continue

            index = self.getIndex(i)
            _apply_index = getattr(index, "_apply_index", None)
            if _apply_index is None:
                continue

            cr.start_split(i)
            limit_result = ILimitedResultIndex.providedBy(index)
            if limit_result:
                r = _apply_index(query, rs)
            else:
                r = _apply_index(query)

            if r is not None:
                r, u = r
                # Short circuit if empty result
                # BBB: We can remove the "r is not None" check in Zope 4
                # once we don't need to support the "return everything" case
                # anymore
                if r is not None and not r:
                    cr.stop_split(i, result=None, limit=limit_result)
                    return LazyCat([])

                # provide detailed info about the pure intersection time
                intersect_id = i + '#intersection'
                cr.start_split(intersect_id)
                # weightedIntersection preserves the values from any mappings
                # we get, as some indexes don't return simple sets
                if hasattr(rs, 'items') or hasattr(r, 'items'):
                    _, rs = weightedIntersection(rs, r)
                else:
                    rs = intersection(rs, r)

                cr.stop_split(intersect_id)

                # consider the time it takes to intersect the index result
                # with the total result set to be part of the index time
                cr.stop_split(i, result=r, limit=limit_result)
                if not rs:
                    break
            else:
                cr.stop_split(i, result=None, limit=limit_result)

        # Try to deduce the sort limit from batching arguments
        b_start = int(query.get('b_start', 0))
        b_size = query.get('b_size', None)
        if b_size is not None:
            b_size = int(b_size)

        if b_size is not None:
            limit = b_start + b_size
        elif limit and b_size is None:
            b_size = limit

        if sort_index is None:
            sort_report_name = None
        else:
            if isinstance(sort_index, list):
                sort_name = '-'.join(i.getId() for i in sort_index)
            else:
                sort_name = sort_index.getId()
            if isinstance(reverse, list):
                reverse_name = '-'.join(
                    'desc' if r else 'asc' for r in reverse)
            else:
                reverse_name = 'desc' if reverse else 'asc'
            sort_report_name = 'sort_on#' + sort_name + '#' + reverse_name
            if limit is not None:
                sort_report_name += '#limit-%s' % limit

        if rs is None:
            # None of the indexes found anything to do with the query
            # We take this to mean that the query was empty (an empty filter)
            # and so we return everything in the catalog
            warnings.warn('Your query %s produced no query restriction. '
                          'Currently the entire catalog content is returned. '
                          'In Zope 4 this will result in an empty LazyCat '
                          'to be returned.' % repr(cr.make_key(query)),
                          DeprecationWarning, stacklevel=3)

            rlen = len(self)
            if sort_index is None:
                sequence, slen = self._limit_sequence(self.data.items(), rlen,
                    b_start, b_size)
                result = LazyMap(self.instantiate, sequence, slen,
                    actual_result_count=rlen)
            else:
                cr.start_split(sort_report_name)
                result = self.sortResults(
                    self.data, sort_index, reverse, limit, merge,
                        actual_result_count=rlen, b_start=b_start,
                        b_size=b_size)
                cr.stop_split(sort_report_name, None)
        elif rs:
            # We got some results from the indexes.
            # Sort and convert to sequences.
            # XXX: The check for 'values' is really stupid since we call
            # items() and *not* values()
            rlen = len(rs)
            if sort_index is None and hasattr(rs, 'items'):
                # having a 'items' means we have a data structure with
                # scores.  Build a new result set, sort it by score, reverse
                # it, compute the normalized score, and Lazify it.

                if not merge:
                    # Don't bother to sort here, return a list of
                    # three tuples to be passed later to mergeResults
                    # note that data_record_normalized_score_ cannot be
                    # calculated and will always be 1 in this case
                    getitem = self.__getitem__
                    result = [(score, (1, score, rid), getitem)
                            for rid, score in rs.items()]
                else:
                    cr.start_split('sort_on#score')

                    # sort it by score
                    rs = rs.byValue(0)
                    max = float(rs[0][0])

                    # Here we define our getter function inline so that
                    # we can conveniently store the max value as a default arg
                    # and make the normalized score computation lazy
                    def getScoredResult(item, max=max, self=self):
                        """
                        Returns instances of self._v_brains, or whatever is
                        passed into self.useBrains.
                        """
                        score, key = item
                        data = self.data[key]
                        klass = self._v_result_class
                        schema_len = len(klass.__record_schema__)
                        norm_score = int(100.0 * score / max)
                        if schema_len == len(data) + 3:
                            r = klass(tuple(data) + (key, score, norm_score))
                        else:
                            r = klass(data)
                            r.data_record_id_ = key
                            r.data_record_score_ = score
                            r.data_record_normalized_score_ = norm_score
                        r = r.__of__(aq_parent(self))
                        return r

                    sequence, slen = self._limit_sequence(rs, rlen, b_start,
                        b_size)
                    result = LazyMap(getScoredResult, sequence, slen,
                        actual_result_count=rlen)
                    cr.stop_split('sort_on#score', None)

            elif sort_index is None and not hasattr(rs, 'values'):
                # no scores
                if hasattr(rs, 'keys'):
                    rs = rs.keys()
                sequence, slen = self._limit_sequence(rs, rlen, b_start,
                    b_size)
                result = LazyMap(self.__getitem__, sequence, slen,
                    actual_result_count=rlen)
            else:
                # sort.  If there are scores, then this block is not
                # reached, therefore 'sort-on' does not happen in the
                # context of a text index query.  This should probably
                # sort by relevance first, then the 'sort-on' attribute.
                cr.start_split(sort_report_name)
                result = self.sortResults(rs, sort_index, reverse, limit,
                    merge, actual_result_count=rlen, b_start=b_start,
                    b_size=b_size)
                cr.stop_split(sort_report_name, None)
        else:
            # Empty result set
            result = LazyCat([])
        cr.stop()
        return result
Пример #10
0
    def search(self, query, sort_index=None, reverse=0, limit=None, merge=1):
        """Iterate through the indexes, applying the query to each one. If
        merge is true then return a lazy result set (sorted if appropriate)
        otherwise return the raw (possibly scored) results for later merging.
        Limit is used in conjuntion with sorting or scored results to inform
        the catalog how many results you are really interested in. The catalog
        can then use optimizations to save time and memory. The number of
        results is not guaranteed to fall within the limit however, you should
        still slice or batch the results as usual."""

        rs = None  # resultset

        # Indexes fulfill a fairly large contract here. We hand each
        # index the query mapping we are given (which may be composed
        # of some combination of web request, kw mappings or plain old dicts)
        # and the index decides what to do with it. If the index finds work
        # for itself in the query, it returns the results and a tuple of
        # the attributes that were used. If the index finds nothing for it
        # to do then it returns None.

        # Canonicalize the request into a sensible query before passing it on
        query = self.make_query(query)

        cr = self.getCatalogPlan(query)
        cr.start()

        plan = cr.plan()
        if not plan:
            plan = self._sorted_search_indexes(query)

        indexes = self.indexes.keys()
        for i in plan:
            if i not in indexes:
                # We can have bogus keys or the plan can contain index names
                # that have been removed in the meantime
                continue

            index = self.getIndex(i)
            _apply_index = getattr(index, "_apply_index", None)
            if _apply_index is None:
                continue

            cr.start_split(i)
            limit_result = ILimitedResultIndex.providedBy(index)
            if limit_result:
                r = _apply_index(query, rs)
            else:
                r = _apply_index(query)

            if r is not None:
                r, u = r
                # Short circuit if empty result
                # BBB: We can remove the "r is not None" check in Zope 2.14
                # once we don't need to support the "return everything" case
                # anymore
                if r is not None and not r:
                    cr.stop_split(i, result=None, limit=limit_result)
                    return LazyCat([])

                # provide detailed info about the pure intersection time
                intersect_id = i + '#intersection'
                cr.start_split(intersect_id)
                w, rs = weightedIntersection(rs, r)
                cr.stop_split(intersect_id)

                # consider the time it takes to intersect the index result with
                # the total resultset to be part of the index time
                cr.stop_split(i, result=r, limit=limit_result)
                if not rs:
                    break
            else:
                cr.stop_split(i, result=None, limit=limit_result)

        # Try to deduce the sort limit from batching arguments
        b_start = int(query.get('b_start', 0))
        b_size = query.get('b_size', None)
        if b_size is not None:
            b_size = int(b_size)

        if b_size is not None:
            limit = b_start + b_size
        elif limit and b_size is None:
            b_size = limit

        if rs is None:
            # None of the indexes found anything to do with the query
            # We take this to mean that the query was empty (an empty filter)
            # and so we return everything in the catalog
            warnings.warn('Your query %s produced no query restriction. '
                          'Currently the entire catalog content is returned. '
                          'In Zope 2.14 this will result in an empty LazyCat '
                          'to be returned.' % repr(make_key(self, query)),
                          DeprecationWarning,
                          stacklevel=3)

            rlen = len(self)
            if sort_index is None:
                sequence, slen = self._limit_sequence(self.data.items(), rlen,
                                                      b_start, b_size)
                result = LazyMap(self.instantiate,
                                 sequence,
                                 slen,
                                 actual_result_count=rlen)
            else:
                cr.start_split('sort_on')
                result = self.sortResults(self.data,
                                          sort_index,
                                          reverse,
                                          limit,
                                          merge,
                                          actual_result_count=rlen,
                                          b_start=b_start,
                                          b_size=b_size)
                cr.stop_split('sort_on', None)
        elif rs:
            # We got some results from the indexes.
            # Sort and convert to sequences.
            # XXX: The check for 'values' is really stupid since we call
            # items() and *not* values()
            rlen = len(rs)
            if sort_index is None and hasattr(rs, 'values'):
                # having a 'values' means we have a data structure with
                # scores.  Build a new result set, sort it by score, reverse
                # it, compute the normalized score, and Lazify it.

                if not merge:
                    # Don't bother to sort here, return a list of
                    # three tuples to be passed later to mergeResults
                    # note that data_record_normalized_score_ cannot be
                    # calculated and will always be 1 in this case
                    getitem = self.__getitem__
                    result = [(score, (1, score, rid), getitem)
                              for rid, score in rs.items()]
                else:
                    cr.start_split('sort_on')

                    rs = rs.byValue(0)  # sort it by score
                    max = float(rs[0][0])

                    # Here we define our getter function inline so that
                    # we can conveniently store the max value as a default arg
                    # and make the normalized score computation lazy
                    def getScoredResult(item, max=max, self=self):
                        """
                        Returns instances of self._v_brains, or whatever is
                        passed into self.useBrains.
                        """
                        score, key = item
                        r=self._v_result_class(self.data[key])\
                              .__of__(aq_parent(self))
                        r.data_record_id_ = key
                        r.data_record_score_ = score
                        r.data_record_normalized_score_ = int(100. * score /
                                                              max)
                        return r

                    sequence, slen = self._limit_sequence(
                        rs, rlen, b_start, b_size)
                    result = LazyMap(getScoredResult,
                                     sequence,
                                     slen,
                                     actual_result_count=rlen)
                    cr.stop_split('sort_on', None)

            elif sort_index is None and not hasattr(rs, 'values'):
                # no scores
                if hasattr(rs, 'keys'):
                    rs = rs.keys()
                sequence, slen = self._limit_sequence(rs, rlen, b_start,
                                                      b_size)
                result = LazyMap(self.__getitem__,
                                 sequence,
                                 slen,
                                 actual_result_count=rlen)
            else:
                # sort.  If there are scores, then this block is not
                # reached, therefore 'sort-on' does not happen in the
                # context of a text index query.  This should probably
                # sort by relevance first, then the 'sort-on' attribute.
                cr.start_split('sort_on')
                result = self.sortResults(rs,
                                          sort_index,
                                          reverse,
                                          limit,
                                          merge,
                                          actual_result_count=rlen,
                                          b_start=b_start,
                                          b_size=b_size)
                cr.stop_split('sort_on', None)
        else:
            # Empty result set
            result = LazyCat([])
        cr.stop()
        return result