Пример #1
0
    def _apply_index( self, request, cid='' ):
        """
            Apply the index to query parameters given in 'request', which
            should be a mapping object.

            If the request does not contain the needed parametrs, then
            return None.

            If the request contains a parameter with the name of the
            column + "_usage", snif for information on how to handle
            applying the index.

            Otherwise return two objects.  The first object is a ResultSet
            containing the record numbers of the matching records.  The
            second object is a tuple containing the names of all data fields
            used.
        """
        record = parseIndexRequest( request, self.getId() )
        if record.keys is None:
            return None

        term        = self._convertDateTime( record.keys[0] )

        #
        #   Aggregate sets for each bucket separately, to avoid
        #   large-small union penalties.
        #
        #until_only  = IISet()
        #map( until_only.update, self._until_only.values( term ) )
        # XXX use multi-union
        until_only = multiunion( self._until_only.values( term ) )

        #since_only  = IISet()
        #map( since_only.update, self._since_only.values( None, term ) )
        # XXX use multi-union
        since_only = multiunion( self._since_only.values( None, term ) )

        #until       = IISet()
        #map( until.update, self._until.values( term ) )
        # XXX use multi-union
        until = multiunion( self._until.values( term ) )

        #since       = IISet()
        #map( since.update, self._since.values( None, term ) )
        # XXX use multi-union
        since = multiunion( self._since.values( None, term ) )

        bounded     = intersection( until, since )

        #   Merge from smallest to largest.
        #result      = union( self._always, until_only )
        result      = union( bounded, until_only )
        result      = union( result, since_only )
        #result      = union( result, bounded )
        result      = union( result, self._always )

        return result, ( self._since_field, self._until_field )
Пример #2
0
    def _apply_index(self, request, cid=''):
        """
            Apply the index to query parameters given in 'request', which
            should be a mapping object.

            If the request does not contain the needed parametrs, then
            return None.

            If the request contains a parameter with the name of the
            column + "_usage", snif for information on how to handle
            applying the index.

            Otherwise return two objects.  The first object is a ResultSet
            containing the record numbers of the matching records.  The
            second object is a tuple containing the names of all data fields
            used.
        """
        record = parseIndexRequest(request, self.getId())
        if record.keys is None:
            return None

        term = self._convertDateTime(record.keys[0])

        #
        #   Aggregate sets for each bucket separately, to avoid
        #   large-small union penalties.
        #
        #until_only  = IISet()
        #map( until_only.update, self._until_only.values( term ) )
        # XXX use multi-union
        until_only = multiunion(self._until_only.values(term))

        #since_only  = IISet()
        #map( since_only.update, self._since_only.values( None, term ) )
        # XXX use multi-union
        since_only = multiunion(self._since_only.values(None, term))

        #until       = IISet()
        #map( until.update, self._until.values( term ) )
        # XXX use multi-union
        until = multiunion(self._until.values(term))

        #since       = IISet()
        #map( since.update, self._since.values( None, term ) )
        # XXX use multi-union
        since = multiunion(self._since.values(None, term))

        bounded = intersection(until, since)

        #   Merge from smallest to largest.
        #result      = union( self._always, until_only )
        result = union(bounded, until_only)
        result = union(result, since_only)
        #result      = union( result, bounded )
        result = union(result, self._always)

        return result, (self._since_field, self._until_field)
Пример #3
0
    def in_(self, arg): # in
        """Given a sequence, return the union of rids for each.

        The argument a string of comma seperated tokens. It is split on the
        commas and the terms are whitespace-stripped to form a sequence of
        strings.

        """

        # Parse and validate.
        # ===================

        if not isinstance(arg, basestring):
            raise TypeError("arg is not a string: '%s'" % arg)
        elif not arg:
            raise ValueError("no arg given")
        elif ',' not in arg:
            raise ValueError("malformed arg [no comma]: '%s'" % arg)
        elif not self.case_sensitive:
            arg = arg.lower()


        # Build.
        # ======

        results= []
        for value in [v.strip() for v in arg.split(',')]:
            result = self.values.get(value, IISet())
            results.append((len(result), result))
        results.sort() # optimization; merge smallest to largest
        return multiunion([r[1] for r in results])
Пример #4
0
    def _apply_index(self, request):
        record = parseIndexRequest(request, self.id)
        try:
            qstart, qend = record.keys
        except TypeError:
            return None

        minint = BTrees.family64.minint
        maxint = BTrees.family64.maxint

        qstart = min(maxint, max(minint, qstart))
        qend = max(minint, min(maxint, qend))

        # start in inside range
        start = multiunion(self._since_index.values(max=qstart))
        end = multiunion(self._until_index.values(min=qstart))
        start_into = intersection(start, end)

        # end inside range
        start = multiunion(self._since_index.values(max=qend))
        end = multiunion(self._until_index.values(min=qend))
        end_into = intersection(start, end)

        # start before range and end after range
        start = multiunion(self._since_index.values(min=qstart))
        end = multiunion(self._until_index.values(max=qend))
        start_before_end_after = intersection(start, end)

        result = union(start_into, end_into)
        result = union(result, start_before_end_after)

        return multiunion(map(self._index.__getitem__, result)), (self.id,)
Пример #5
0
 def _eval(self,context):
   csq = self._classifySubqueries()
   if csq['empty']: return context._getObjectIds()
   sqs= csq['lookup'] + csq['complex'] + csq['indexed'] + csq['notQ']
   if not sqs: return IISet()
   if len(sqs) >= 4: return multiunion([q._eval(context) for q in sqs])
   r = None
   for q in sqs: r = union(r,q._eval(context))
   return r
Пример #6
0
 def _eval(self, context):
     csq = self._classifySubqueries()
     if csq['empty']: return context._getObjectIds()
     sqs = csq['lookup'] + csq['complex'] + csq['indexed'] + csq['notQ']
     if not sqs: return IISet()
     if len(sqs) >= 4: return multiunion([q._eval(context) for q in sqs])
     r = None
     for q in sqs:
         r = union(r, q._eval(context))
     return r
Пример #7
0
 def _apply_not(self, not_parm, resultset=None):
     index = self._index
     setlist = []
     for k in not_parm:
         s = index.get(k, None)
         if s is None:
             continue
         elif isinstance(s, int):
             s = IISet((s, ))
         setlist.append(s)
     return multiunion(setlist)
Пример #8
0
 def _apply_not(self, not_parm, resultset=None):
     index = self._index
     setlist = []
     for k in not_parm:
         s = index.get(k, None)
         if s is None:
             continue
         elif isinstance(s, int):
             s = IISet((s, ))
         setlist.append(s)
     return multiunion(setlist)
Пример #9
0
    def _apply_index(self, request, resultset=None):
        setlist = []
        indices_used = []
        for reltype in self.getIndexSourceNames():
            query = request.get(reltype)
            if query is None:
                continue

            if isinstance(query, str):
                target = query
            else:
                target = IUUID(query)

            indices_used.append(reltype)
            index = self._index[reltype]
            s = index.get(target)
            if s is None:
                continue
            else:
                setlist.append(s)

        if not indices_used:
            return

        if len(setlist) == 1:
            return setlist[0], tuple(indices_used)

        # If we already get a small result set passed in, intersecting
        # the various indexes with it and doing the union later is
        # faster than creating a multiunion first.
        if resultset is not None and len(resultset) < 200:
            smalllist = []
            for s in setlist:
                smalllist.append(intersection(resultset, s))
            r = multiunion(smalllist)
        else:
            r = multiunion(setlist)

        if r is None:
            r = IISet()
        return r, tuple(indices_used)
Пример #10
0
    def query_index(self, record, resultset=None):
        cache = self.getRequestCache()
        if cache is not None:
            cachekey = self.getRequestCacheKey(record, resultset)
            cached = cache.get(cachekey, None)
            if cached is not None:
                if resultset is None:
                    return cached
                else:
                    return difference(resultset, cached)

        term = self._convertDateTime(record.keys[0])
        if resultset is None:
            # Aggregate sets for each bucket separately, to avoid
            # large-small union penalties.
            until_only = multiunion(self._until_only.values(term))
            since_only = multiunion(self._since_only.values(None, term))
            until = multiunion(self._until.values(term))
            since = multiunion(self._since.values(None, term))
            bounded = intersection(until, since)

            # Merge from smallest to largest.
            result = multiunion(
                [bounded, until_only, since_only, self._always])
            if cache is not None:
                cache[cachekey] = result

            return result
        else:
            # Compute the inverse and subtract from res
            until_only = multiunion(self._until_only.values(None, term - 1))
            since_only = multiunion(self._since_only.values(term + 1))
            until = multiunion(self._until.values(None, term - 1))
            since = multiunion(self._since.values(term + 1))

            result = multiunion([since, since_only, until_only, until])
            if cache is not None:
                cache[cachekey] = result

            return difference(resultset, result)
    def query_index(self, record, resultset=None):
        cache = self.getRequestCache()
        if cache is not None:
            cachekey = self.getRequestCacheKey(record, resultset)
            cached = cache.get(cachekey, None)
            if cached is not None:
                if resultset is None:
                    return cached
                else:
                    return difference(resultset, cached)

        term = self._convertDateTime(record.keys[0])
        if resultset is None:
            # Aggregate sets for each bucket separately, to avoid
            # large-small union penalties.
            until_only = multiunion(self._until_only.values(term))
            since_only = multiunion(self._since_only.values(None, term))
            until = multiunion(self._until.values(term))
            since = multiunion(self._since.values(None, term))
            bounded = intersection(until, since)

            # Merge from smallest to largest.
            result = multiunion([bounded, until_only, since_only,
                                 self._always])
            if cache is not None:
                cache[cachekey] = result

            return result
        else:
            # Compute the inverse and subtract from res
            until_only = multiunion(self._until_only.values(None, term - 1))
            since_only = multiunion(self._since_only.values(term + 1))
            until = multiunion(self._until.values(None, term - 1))
            since = multiunion(self._since.values(term + 1))

            result = multiunion([since, since_only, until_only, until])
            if cache is not None:
                cache[cachekey] = result

            return difference(resultset, result)
Пример #12
0
 def _eval(self, context):
     csq = self._classifySubqueries()
     if csq["empty"]:
         return context._getObjectIds()
     sqs = csq["lookup"] + csq["complex"] + csq["indexed"] + csq["notQ"]
     if not sqs:
         return IISet()
     if len(sqs) >= 4:
         return multiunion([q._eval(context) for q in sqs])
     r = None
     for q in sqs:
         r = union(r, q._eval(context))
     return r
Пример #13
0
 def __call__(self):
     context = self.context
     acl_users = aq_get(context, 'acl_users')
     catalog = getToolByName(context, 'portal_catalog')
     now = DateTime()
     modified = catalog._catalog.indexes['modified']
     mod_values = modified._index.values
     days_7 = modified._convert(now - 7)
     days_14 = modified._convert(now - 14)
     days_30 = modified._convert(now - 30)
     output = {
         'version': config.package_version,
         'objects': len(catalog),
         'users': len(acl_users.source_users.listUserIds()),
         'modified_30': len(multiunion(mod_values(days_30))),
         'modified_14': len(multiunion(mod_values(days_14))),
         'modified_7': len(multiunion(mod_values(days_7))),
     }
     response = self.request.response
     response.setHeader('content-type', 'application/json')
     response.setBody(json.dumps(output))
     return response
Пример #14
0
def setOperation(op, sets, isearch):
    '''perform *op* on *sets*. if *isearch*, return an incremental search.

  *op* may be '"and"' or '"or"'.

  Uses 'IncrementalSearch', if available.
  '''
    if not sets:
        if op == 'and': return  # None means all results
        if isearch:
            search = IOr()
            search.complete()
            return search
        return IISet()
    # Note: "multiunion" is *much* faster than "IOr"!
    #if IAnd is not None and (isearch or len(sets) > 1):
    if IAnd is not None and (isearch or (op == 'and' and len(sets) > 1)):
        isets = []
        for set in sets:
            if set is None:
                # all results
                if op == 'and': continue
                else: return
            if not isinstance(set, ISearch): set = IBTree(set)
            isets.append(set)
        if op == 'and' and not isets: return  # empty 'and'
        cl = op == 'and' and IAnd or IOr
        if len(isets) == 1:
            # do not wrap a one element search
            search = isets[0]
        else:
            search = cl(*isets)
            search.complete()
        if isearch: return search
        return search.asSet()
    if op == 'or' and len(sets) > 3:
        r = multiunion(sets)
    else:
        combine = op == 'and' and intersection or union
        r = None
        for set in sets:
            r = combine(r, set)
        if r is None:
            if combine is union: r = IISet()
            else: return
        if isearch: r = IBTree(r)
    return r
Пример #15
0
    def _search(self, path, default_level=0):
        """ Perform the actual search.

        ``path``
            a string representing a relative URL, or a part of a relative URL,
            or a tuple ``(path, level)``.  In the first two cases, use
            ``default_level`` as the level for the search.

        ``default_level``
            the level to use for non-tuple queries.

        ``level >= 0`` =>  match ``path`` only at the given level.

        ``level <  0`` =>  match ``path`` at *any* level
        """
        if isinstance(path, str):
            level = default_level
        else:
            level = int(path[1])
            path = path[0]

        if level < 0:
            # Search at every level, return the union of all results
            return multiunion([
                self._search(path, level) for level in range(self._depth + 1)
            ])

        comps = list(filter(None, path.split('/')))

        if level + len(comps) - 1 > self._depth:
            # Our search is for a path longer than anything in the index
            return IISet()

        if len(comps) == 0:
            return IISet(self._unindex.keys())

        results = None
        for i, comp in reversed(list(enumerate(comps))):
            tree = self._index.get(comp, None)
            if tree is None:
                return IISet()
            tree2 = tree.get(level + i, None)
            if tree2 is None:
                return IISet()
            results = intersection(results, tree2)
        return results
Пример #16
0
    def below(self, arg):
        """Find all resources at or below path, within the limits given.
        """

        # Parse and validate.
        # ===================

        path, upper, lower = self._path_and_limits(arg)
        rid = self.path2rid.get(path, None)
        if rid is None:
            return


        # Build
        # =====

        parts = path.split(os.sep)
        rids = None
        for level in range(len(parts)):
            rids = intersection(rids, self.parts[(level, parts[level])])
        if rids is None:
            return IISet() # short-cut


        # Limits
        # ======
        # Remove rids that are above any upper limit, and then only include rids
        # that are above any lower limit. Limits are relative to the level of
        # the requested path.

        if upper is not None:
            upper += level
            for i in range(level, upper):
                if i not in self.levels:
                    break
                rids = difference(rids, self.levels[i])
        if lower is not None:
            lower += level
            _rids = []
            for i in range(level, lower):
                if i not in self.levels:
                    break
                _rids.append(self.levels[i])
            rids = intersection(rids, multiunion(_rids))

        return rids
Пример #17
0
    def _search(self, path, default_level=0):
        """ Perform the actual search.

        ``path``
            a string representing a relative URL, or a part of a relative URL,
            or a tuple ``(path, level)``.  In the first two cases, use
            ``default_level`` as the level for the search.

        ``default_level``
            the level to use for non-tuple queries.

        ``level >= 0`` =>  match ``path`` only at the given level.

        ``level <  0`` =>  match ``path`` at *any* level
        """
        if isinstance(path, str):
            level = default_level
        else:
            level = int(path[1])
            path = path[0]

        if level < 0:
            # Search at every level, return the union of all results
            return multiunion(
                [self._search(path, level)
                 for level in range(self._depth + 1)])

        comps = filter(None, path.split('/'))

        if level + len(comps) - 1 > self._depth:
            # Our search is for a path longer than anything in the index
            return IISet()

        if len(comps) == 0:
            return IISet(self._unindex.keys())

        results = None
        for i, comp in reversed(list(enumerate(comps))):
            tree = self._index.get(comp, None)
            if tree is None:
                return IISet()
            tree2 = tree.get(level + i, None)
            if tree2 is None:
                return IISet()
            results = intersection(results, tree2)
        return results
def setOperation(op, sets, isearch):
  '''perform *op* on *sets*. if *isearch*, return an incremental search.

  *op* may be '"and"' or '"or"'.

  Uses 'IncrementalSearch', if available.
  '''
  if not sets:
    if op == 'and': return # None means all results
    if isearch: search = IOr(); search.complete(); return search
    return IISet()
  # Note: "multiunion" is *much* faster than "IOr"!
  #if IAnd is not None and (isearch or len(sets) > 1):
  if IAnd is not None and (isearch or (op == 'and' and len(sets) > 1)):
    isets = []
    for set in sets:
      if set is None:
        # all results
        if op == 'and': continue
        else: return
      if not isinstance(set, ISearch): set = IBTree(set)
      isets.append(set)
    if op == 'and' and not isets: return # empty 'and'
    cl = op == 'and' and IAnd or IOr
    if len(isets) == 1:
      # do not wrap a one element search
      search = isets[0]
    else: search = cl(*isets); search.complete()
    if isearch: return search
    if hasattr(search, 'asSet'): r = search.asSet()
    else: r = IISet(); r.__setstate__((tuple(search),))
    return r
  if op == 'or' and len(sets) > 5:
    r = multiunion(sets)
  else:
    combine = op == 'and' and intersection or union
    r= None
    for set in sets: r= combine(r,set)
    if r is None:
      if combine is union: r = IISet()
      else: return
    if isearch: r = IBTree(r)
  return r
Пример #19
0
    def refresh(self):
        """Load the data set from the database.

        self.constraints contains a list of lists. Within each sublist, the
        terms are either ANDed or NOTed together; the results are then ORed.

        The below could be optimized in a couple ways:

          - stop searching as soon as the result set proves empty
          - perform both levels of merge from smallest to largest

        See original ZCatalog code for implementation hints.

        """
        import dewey # avoid circular import
        all = dewey.get_catalog().rids

        if self.constraints is None:
            results = all
        else:
            results = []
            for grouping in self.constraints:
                for operation, query, (call, arg) in grouping:
                    if (operation is None) and (call is None):       # OR
                        result = all
                    elif (operation is None) and (call is not None): # OR ...
                        result = call(arg)
                    else:                                            # AND/NOT
                        assert None not in (operation, call) # safety net
                        result = operation(result, call(arg))
                if result is not None:
                    results.append(result)
            results = multiunion(results) # OR

            if results is None:
                results = IISet()

        self.data = results
Пример #20
0
    def above(self, arg):
        """Find all resources at or above path, within the limits given.

        Here we actually call below() on <path> and all of its ancestors,
        passing the limits straight through, with the exception that limits
        default to 0:1 rather than None:None. Use '0:' for the latter.

        """

        # Parse and validate.
        # ===================

        path, upper, lower = self._path_and_limits(arg)
        rid = self.path2rid.get(path, None)
        if rid is None:
            return


        # Build
        # =====

        tmpl = "%s "
        if (upper, lower) == (None, None):
            tmpl += '0:1' # default: breadcrumbs
        else:
            if upper is not None:
                tmpl += str(upper)
            tmpl += ":"
            if lower is not None:
                tmpl += str(lower)

        parts = path.split(os.sep)
        rids = []
        for level in range(len(parts)):
            ancestor = os.sep.join(parts[:level+1])
            ancestor = ancestor and ancestor or '/'
            rids.append(self.below(tmpl % ancestor))
        rids = multiunion(rids)
Пример #21
0
    def in_(self, arg): # in
        """Given a sequence, return the union of rids for each.

        If the argument starts with a [ or (, it is evaled as a list or tuple.
        Otherwise, it is split on comma and stripped to form a sequence of
        strings.

        """

        # Parse and validate.
        # ===================

        if not isinstance(arg, basestring):
            raise TypeError("arg is not a string: '%s'" % arg)
        elif not arg:
            raise ValueError("no arg given")
        elif ',' not in arg:
            raise ValueError("malformed arg [no comma]: '%s'" % arg)

        if arg[0] in '[(':
            values = eval(arg)
            if not isinstance(values, (list, tuple)):
                raise TypeError("arg didn't define list or tuple")
        else:
            values = [v.strip() for v in arg.split(',')]


        # Build.
        # ======

        results= []
        for value in values:
            result = self.rids.get(value, IISet())
            results.append((len(result), result))
        results.sort() # optimization; merge smallest to largest
        return multiunion([r[1] for r in results])
def dateindex_apply_index(self, request, cid="", type=type, res=None):
    record = parseIndexRequest(request, self.id, self.query_options)
    if record.keys == None:
        return None

    keys = map(self._convert, record.keys)

    index = self._index
    r = None
    opr = None

    # experimental code for specifing the operator
    operator = record.get("operator", self.useOperator)
    if not operator in self.operators:
        raise RuntimeError, "operator not valid: %s" % operator

    # depending on the operator we use intersection or union
    if operator == "or":
        set_func = union
    else:
        set_func = intersection

    # range parameter
    range_arg = record.get("range", None)
    if range_arg:
        opr = "range"
        opr_args = []
        if range_arg.find("min") > -1:
            opr_args.append("min")
        if range_arg.find("max") > -1:
            opr_args.append("max")

    if record.get("usage", None):
        # see if any usage params are sent to field
        opr = record.usage.lower().split(":")
        opr, opr_args = opr[0], opr[1:]

    if opr == "range":  # range search
        if "min" in opr_args:
            lo = min(keys)
        else:
            lo = None

        if "max" in opr_args:
            hi = max(keys)
        else:
            hi = None

        if hi:
            setlist = index.values(lo, hi)
        else:
            setlist = index.values(lo)

        # for k, set in setlist:
        # if type(set) is IntType:
        # set = IISet((set,))
        # r = set_func(r, set)
        # XXX: Use multiunion!
        r = multiunion(setlist)

    else:  # not a range search
        for key in keys:
            set = index.get(key, None)
            if set is not None:
                if isinstance(set, int):
                    set = IISet((set,))
                else:
                    # set can't be bigger than res
                    set = intersection(set, res)
                r = set_func(r, set)

    if isinstance(r, int):
        r = IISet((r,))

    if r is None:
        return IISet(), (self.id,)
    else:
        return r, (self.id,)
def dateindex_apply_index( self, request, cid='', type=type, res=None):
    record = parseIndexRequest( request, self.id, self.query_options )
    if record.keys == None:
        return None

    keys = map( self._convert, record.keys )

    index = self._index
    r = None
    opr = None

    #experimental code for specifing the operator
    operator = record.get( 'operator', self.useOperator )
    if not operator in self.operators :
        raise RuntimeError, "operator not valid: %s" % operator

    # depending on the operator we use intersection or union
    if operator=="or":
        set_func = union
    else:
        set_func = intersection

    # range parameter
    range_arg = record.get('range',None)
    if range_arg:
        opr = "range"
        opr_args = []
        if range_arg.find("min") > -1:
            opr_args.append("min")
        if range_arg.find("max") > -1:
            opr_args.append("max")

    if record.get('usage',None):
        # see if any usage params are sent to field
        opr = record.usage.lower().split(':')
        opr, opr_args = opr[0], opr[1:]

    if opr=="range":   # range search
        if 'min' in opr_args:
            lo = min(keys)
        else:
            lo = None

        if 'max' in opr_args:
            hi = max(keys)
        else:
            hi = None

        if hi:
            setlist = index.values(lo,hi)
        else:
            setlist = index.values(lo)

        #for k, set in setlist:
            #if type(set) is IntType:
                #set = IISet((set,))
            #r = set_func(r, set)
        # XXX: Use multiunion!
        r = multiunion(setlist)

    else: # not a range search
        for key in keys:
            set = index.get(key, None)
            if set is not None:
                if isinstance(set, int):
                    set = IISet((set,))
                else:
                    # set can't be bigger than res
                    set = intersection(set, res)
                r = set_func(r, set)

    if isinstance(r, int):
        r = IISet((r,))

    if r is None:
        return IISet(), (self.id,)
    else:
        return r, (self.id,)
Пример #24
0
    def _apply_index(self, request, cid='', type=type):
        """Apply the index to query parameters given in the request arg.

        The request argument should be a mapping object.

        If the request does not have a key which matches the "id" of
        the index instance, then None is returned.

        If the request *does* have a key which matches the "id" of
        the index instance, one of a few things can happen:

          - if the value is a blank string, None is returned (in
            order to support requests from web forms where
            you can't tell a blank string from empty).

          - if the value is a nonblank string, turn the value into
            a single-element sequence, and proceed.

          - if the value is a sequence, return a union search.

        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        q_start = record.get('start', None)
        q_end = record.get('end', None)

        if q_start is None or q_end is None:
            return IISet(), (self.id,)

        # get both indexes: for start and end
        zcatalog = self._getCatalog()
        i_start = zcatalog.getIndex(self.startindex)
        i_end = zcatalog.getIndex(self.endindex)

        # search:
        #
        #       q_start|--------------------|q_end
        #
        # 1) i_start|---------------------------|i_end
        #
        # 2) i_start|---------------|i_end
        #
        # 3)           i_start|-----------------|i_end
        #
        # 4)           i_start|-----|i_end

        ###################################
        # do 1) objects with "both outside"
        #
        query1_1 = {
            self.startindex: {
                # objects starting before q_start
                'query': q_start,
                'range': 'max',
            }
        }
        res1_1 = i_start._apply_index(query1_1)

        query1_2 = {
            self.endindex: {
                # objects ending after q_end
                'query': q_end,
                'range': 'min',
            }
        }
        res1_2 = i_end._apply_index(query1_2)
        res1 = intersection(res1_1[0], res1_2[0])

        #####################################
        # do 2) objects with "start inside"
        #
        query2_1 = {
            self.endindex: {
                # objects ending after q_start
                'query': q_start,
                'range': 'min',
            }
        }
        res2_1 = i_end._apply_index(query2_1)

        query2_2 = {
            self.endindex: {
                # objects ending before q_end
                'query': q_end,
                'range': 'max',
            }
        }
        res2_2 = i_end._apply_index(query2_2)
        res2 = intersection(res2_1[0], res2_2[0])

        ###################################
        # do 3) objects with "end inside"
        query3_1 = {
            self.startindex: {
                # objects starting after q_start
                'query': q_start,
                'range': 'min',
            }
        }
        res3_1 = i_start._apply_index(query3_1)

        query3_2 = {
            self.startindex: {
                # objects starting before q_end
                'query': q_end,
                'range': 'max',
            }
        }
        res3_2 = i_start._apply_index(query3_2)
        res3 = intersection(res3_1[0], res3_2[0])

        ###################################
        # do 4) object where both are inside
        # -> already found with 2) and 3)  :-)

        ###################################
        # union the three results
        result = multiunion([res1, res2, res3])

        # last: return the result
        return result, (self.id,)
Пример #25
0
def extendedpathindex_search(self,
                             path,
                             default_level=0,
                             depth=-1,
                             navtree=0,
                             navtree_start=0,
                             tmpres=None):
    """
    path is either a string representing a
    relative URL or a part of a relative URL or
    a tuple (path,level).
    
    default_level specifies the level to use when no more specific
    level has been passed in with the path.

    level >= 0  starts searching at the given level
    level <  0  finds matches at *any* level
    
    depth let's you limit the results to items at most depth levels deeper
    than the matched path. depth == 0 means no subitems are included at all,
    with depth == 1 only direct children are included, etc. depth == -1, the
    default, returns all children at any depth.
    
    navtree is treated as a boolean; if it evaluates to True, not only the
    query match is returned, but also each container in the path. If depth
    is greater than 0, also all siblings of those containers, as well as the
    siblings of the match are included as well, plus *all* documents at the
    starting level.
    
    navtree_start limits what containers are included in a navtree search.
    If greater than 0, only containers (and possibly their siblings) at that
    level and up will be included in the resultset.

    """
    if isinstance(path, basestring):
        level = default_level
    else:
        level = int(path[1])
        path = path[0]

    if level < 0:
        # Search at every level, return the union of all results
        return multiunion([
            self.search(path, level, depth, navtree, navtree_start)
            for level in xrange(self._depth + 1)
        ])

    comps = filter(None, path.split('/'))

    if navtree and depth == -1:  # Navtrees don't do recursive
        depth = 1

    #
    # Optimisations
    #

    pathlength = level + len(comps) - 1
    if navtree and navtree_start > min(pathlength + depth, self._depth):
        # This navtree_start excludes all items that match the depth
        return IISet()
    if pathlength > self._depth:
        # Our search is for a path longer than anything in the index
        return IISet()

    if level == 0 and depth in (0, 1):
        # We have easy indexes for absolute paths where
        # we are looking for depth 0 or 1 result sets
        if navtree:
            # Optimized absolute path navtree and breadcrumbs cases
            result = []
            add = lambda x: x is not None and result.append(x)
            if depth == 1:
                # Navtree case, all sibling elements along the path
                convert = multiunion
                index = self._index_parents
            else:
                # Breadcrumbs case, all direct elements along the path
                convert = IISet
                index = self._index_items
            # Collect all results along the path
            for i in range(len(comps), navtree_start - 1, -1):
                parent_path = '/' + '/'.join(comps[:i])
                add(index.get(parent_path))
            return convert(result)

        if not path.startswith('/'):
            path = '/' + path
        if depth == 0:
            # Specific object search
            res = self._index_items.get(path)
            return res and IISet([res]) or IISet()
        else:
            # Single depth search
            return self._index_parents.get(path, IISet())

    # Avoid using the root set
    # as it is common for all objects anyway and add overhead
    # There is an assumption about all indexed values having the
    # same common base path
    if level == 0:
        indexpath = list(filter(None, self.getPhysicalPath()))
        minlength = min(len(indexpath), len(comps))
        # Truncate path to first different element
        for i in xrange(minlength):
            if indexpath[i] != comps[i]:
                break
            level += 1
        comps = comps[level:]

    if not comps and depth == -1:
        # Recursive search for everything
        return IISet(self._unindex)

    #
    # Core application of the indexes
    #

    pathset = tmpres  # Same as pathindex
    depthset = None  # For limiting depth

    if navtree and depth > 0:
        # Include the elements up to the matching path
        depthset = multiunion([
            self._index.get(None, {}).get(i, IISet())
            for i in range(min(navtree_start, level),
                           max(navtree_start, level) + 1)
        ])

    indexedcomps = enumerate(comps)
    if not navtree:
        # Optimize relative-path searches by starting with the
        # presumed smaller sets at the end of the path first
        # We can't do this for the navtree case because it needs
        # the bigger rootset to include siblings along the way.
        indexedcomps = list(indexedcomps)
        indexedcomps.reverse()

    for i, comp in indexedcomps:
        # Find all paths that have comp at the given level
        res = self._index.get(comp, {}).get(i + level)
        if res is None:  # Non-existing path; navtree is inverse, keep going
            pathset = IISet()
            if not navtree: return pathset
        pathset = intersection(pathset, res)

        if navtree and i + level >= navtree_start:
            depthset = union(
                depthset,
                intersection(pathset,
                             self._index.get(None, {}).get(i + level)))

    if depth >= 0:
        # Limit results to those that terminate within depth levels
        start = len(comps) - 1
        if navtree: start = max(start, (navtree_start - level))
        depthset = multiunion(
            filter(None, [depthset] + [
                intersection(pathset,
                             self._index.get(None, {}).get(i + level))
                for i in xrange(start, start + depth + 1)
            ]))

    if navtree or depth >= 0: return depthset
    return pathset
Пример #26
0
    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in the argument

        Normalize the 'query' arguments into integer values at minute
        precision before querying.
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        keys = map( self._convert, record.keys )

        index = self._index
        r = None
        opr = None

        #experimental code for specifing the operator
        operator = record.get( 'operator', self.useOperator )
        if not operator in self.operators :
            raise RuntimeError("operator not valid: %s" % operator)

        # depending on the operator we use intersection or union
        if operator=="or":
            set_func = union
        else:
            set_func = intersection

        # range parameter
        range_arg = record.get('range',None)
        if range_arg:
            opr = "range"
            opr_args = []
            if range_arg.find("min") > -1:
                opr_args.append("min")
            if range_arg.find("max") > -1:
                opr_args.append("max")

        if record.get('usage',None):
            # see if any usage params are sent to field
            opr = record.usage.lower().split(':')
            opr, opr_args = opr[0], opr[1:]

        if opr=="range":   # range search
            if 'min' in opr_args:
                lo = min(keys)
            else:
                lo = None

            if 'max' in opr_args:
                hi = max(keys)
            else:
                hi = None

            if hi:
                setlist = index.values(lo,hi)
            else:
                setlist = index.values(lo)

            r = multiunion(setlist)

        else: # not a range search
            for key in keys:
                set = index.get(key, None)
                if set is not None:
                    if isinstance(set, int):
                        set = IISet((set,))
                    else:
                        # set can't be bigger than resultset
                        set = intersection(set, resultset)
                    r = set_func(r, set)

        if isinstance(r, int):
            r = IISet((r,))

        if r is None:
            return IISet(), (self.id,)
        else:
            return r, (self.id,)
def unindex_apply_index(self, request, cid='', type=type, res=None):
    record = parseIndexRequest(request, self.id, self.query_options)
    if record.keys==None: return None

    index = self._index
    r     = None
    opr   = None

    # experimental code for specifing the operator
    operator = record.get('operator',self.useOperator)
    if not operator in self.operators :
        raise RuntimeError,"operator not valid: %s" % escape(operator)

    # depending on the operator we use intersection or union
    if operator=="or":  set_func = union
    else:               set_func = intersection

    # Range parameter
    range_parm = record.get('range',None)
    if range_parm:
        opr = "range"
        opr_args = []
        if range_parm.find("min")>-1:
            opr_args.append("min")
        if range_parm.find("max")>-1:
            opr_args.append("max")

    if record.get('usage',None):
        # see if any usage params are sent to field
        opr = record.usage.lower().split(':')
        opr, opr_args=opr[0], opr[1:]

    if opr=="range":   # range search
        if 'min' in opr_args: lo = min(record.keys)
        else: lo = None
        if 'max' in opr_args: hi = max(record.keys)
        else: hi = None
        if hi:
            setlist = index.values(lo,hi)
        else:
            setlist = index.values(lo)


        # If we only use 1 key (default setting), intersect and return immediately
        if len(setlist) == 1:
            result = setlist[0]
            if isinstance(result, int):
                result = IISet((result,))
            return result, (self.id,)

        if operator == 'or':
            r = multiunion(setlist)
        else:
            # For intersection, sort with smallest data set first
            tmp = []
            for s in setlist:
                if isinstance(s, int):
                    s = IISet((s,))
                tmp.append(s)
            if len(tmp) > 2:
                setlist = sorted(tmp, key=len)
            else:
                setlist = tmp
            r = res
            for s in setlist:
                r = intersection(r, s)

    else: # not a range search
        # Filter duplicates, and sort by length
        keys = set(record.keys)
        setlist = []
        for k in keys:
            s = index.get(k, None)
            # If None, try to bail early
            if s is None:
                if operator == 'or':
                    # If union, we can't possibly get a bigger result
                    continue
                # If intersection, we can't possibly get a smaller result
                return IISet(), (self.id,)
            elif isinstance(s, int):
                s = IISet((s,))
            setlist.append(s)

        # If we only use 1 key (default setting), intersect and return immediately
        if len(setlist) == 1:
            result = setlist[0]
            if isinstance(result, int):
                result = IISet((result,))
            return result, (self.id,)

        if operator == 'or':
            # If we already get a small result set passed in, intersecting
            # the various indexes with it and doing the union later is faster
            # than creating a multiunion first.
            if res is not None and len(res) < 200:
                smalllist = []
                for s in setlist:
                    smalllist.append(intersection(res, s))
                r = multiunion(smalllist)
            else:
                r = multiunion(setlist)
        else:
            # For intersection, sort with smallest data set first
            if len(setlist) > 2:
                setlist = sorted(setlist, key=len)
            r = res
            for s in setlist:
                r = intersection(r, s)

    if isinstance(r, int):  r=IISet((r,))
    if r is None:
        return IISet(), (self.id,)
    else:
        return r, (self.id,)
Пример #28
0
    def query_index(self, record, resultset=None):
        """Search the index with the given IndexQuery object.

        If not `None`, the resultset argument
        indicates that the search result is relevant only on this set,
        i.e. everything outside resultset is of no importance.
        The index can use this information for optimizations.
        """
        index = self._index
        r = None
        opr = None

        # not / exclude parameter
        not_parm = record.get('not', None)

        operator = record.operator

        cachekey = None
        cache = self.getRequestCache()
        if cache is not None:
            cachekey = self.getRequestCacheKey(record)
            if cachekey is not None:
                cached = None
                if operator == 'or':
                    cached = cache.get(cachekey, None)
                else:
                    cached_setlist = cache.get(cachekey, None)
                    if cached_setlist is not None:
                        r = resultset
                        for s in cached_setlist:
                            # the result is bound by the resultset
                            r = intersection(r, s)
                            # If intersection, we can't possibly get a
                            # smaller result
                            if not r:
                                break
                        cached = r

                if cached is not None:
                    if isinstance(cached, int):
                        cached = IISet((cached, ))

                    if not_parm:
                        not_parm = list(map(self._convert, not_parm))
                        exclude = self._apply_not(not_parm, resultset)
                        cached = difference(cached, exclude)

                    return cached

        if not record.keys and not_parm:
            # convert into indexed format
            not_parm = list(map(self._convert, not_parm))
            # we have only a 'not' query
            record.keys = [k for k in index.keys() if k not in not_parm]
        else:
            # convert query arguments into indexed format
            record.keys = list(map(self._convert, record.keys))

        # Range parameter
        range_parm = record.get('range', None)
        if range_parm:
            opr = 'range'
            opr_args = []
            if range_parm.find('min') > -1:
                opr_args.append('min')
            if range_parm.find('max') > -1:
                opr_args.append('max')

        if record.get('usage', None):
            # see if any usage params are sent to field
            opr = record.usage.lower().split(':')
            opr, opr_args = opr[0], opr[1:]

        if opr == 'range':  # range search
            if 'min' in opr_args:
                lo = min(record.keys)
            else:
                lo = None
            if 'max' in opr_args:
                hi = max(record.keys)
            else:
                hi = None
            if hi:
                setlist = index.values(lo, hi)
            else:
                setlist = index.values(lo)

            # If we only use one key, intersect and return immediately
            if len(setlist) == 1:
                result = setlist[0]
                if isinstance(result, int):
                    result = IISet((result,))

                if cachekey is not None:
                    if operator == 'or':
                        cache[cachekey] = result
                    else:
                        cache[cachekey] = [result]

                if not_parm:
                    exclude = self._apply_not(not_parm, resultset)
                    result = difference(result, exclude)
                return result

            if operator == 'or':
                tmp = []
                for s in setlist:
                    if isinstance(s, int):
                        s = IISet((s,))
                    tmp.append(s)
                r = multiunion(tmp)

                if cachekey is not None:
                    cache[cachekey] = r
            else:
                # For intersection, sort with smallest data set first
                tmp = []
                for s in setlist:
                    if isinstance(s, int):
                        s = IISet((s,))
                    tmp.append(s)
                if len(tmp) > 2:
                    setlist = sorted(tmp, key=len)
                else:
                    setlist = tmp

                # 'r' is not invariant of resultset. Thus, we
                # have to remember 'setlist'
                if cachekey is not None:
                    cache[cachekey] = setlist

                r = resultset
                for s in setlist:
                    # the result is bound by the resultset
                    r = intersection(r, s)
                    # If intersection, we can't possibly get a smaller result
                    if not r:
                        break

        else:  # not a range search
            # Filter duplicates
            setlist = []
            for k in record.keys:
                if k is None:
                    # Prevent None from being looked up. None doesn't
                    # have a valid ordering definition compared to any
                    # other object. BTrees 4.0+ will throw a TypeError
                    # "object has default comparison".
                    continue
                try:
                    s = index.get(k, None)
                except TypeError:
                    # key is not valid for this Btree so the value is None
                    LOG.error(
                        '%(context)s: query_index tried '
                        'to look up key %(key)r from index %(index)r '
                        'but key was of the wrong type.', dict(
                            context=self.__class__.__name__,
                            key=k,
                            index=self.id,
                        )
                    )
                    s = None
                # If None, try to bail early
                if s is None:
                    if operator == 'or':
                        # If union, we can possibly get a bigger result
                        continue
                    # If intersection, we can't possibly get a smaller result
                    if cachekey is not None:
                        # If operator is 'and', we have to cache a list of
                        # IISet objects
                        cache[cachekey] = [IISet()]
                    return IISet()
                elif isinstance(s, int):
                    s = IISet((s,))
                setlist.append(s)

            # If we only use one key return immediately
            if len(setlist) == 1:
                result = setlist[0]
                if isinstance(result, int):
                    result = IISet((result,))

                if cachekey is not None:
                    if operator == 'or':
                        cache[cachekey] = result
                    else:
                        cache[cachekey] = [result]

                if not_parm:
                    exclude = self._apply_not(not_parm, resultset)
                    result = difference(result, exclude)
                return result

            if operator == 'or':
                # If we already get a small result set passed in, intersecting
                # the various indexes with it and doing the union later is
                # faster than creating a multiunion first.

                if resultset is not None and len(resultset) < 200:
                    smalllist = []
                    for s in setlist:
                        smalllist.append(intersection(resultset, s))
                    r = multiunion(smalllist)

                    # 'r' is not invariant of resultset.  Thus, we
                    # have to remember the union of 'setlist'. But
                    # this is maybe a performance killer. So we do not cache.
                    # if cachekey is not None:
                    #    cache[cachekey] = multiunion(setlist)

                else:
                    r = multiunion(setlist)
                    if cachekey is not None:
                        cache[cachekey] = r
            else:
                # For intersection, sort with smallest data set first
                if len(setlist) > 2:
                    setlist = sorted(setlist, key=len)

                # 'r' is not invariant of resultset. Thus, we
                # have to remember the union of 'setlist'
                if cachekey is not None:
                    cache[cachekey] = setlist

                r = resultset
                for s in setlist:
                    r = intersection(r, s)
                    # If intersection, we can't possibly get a smaller result
                    if not r:
                        break

        if isinstance(r, int):
            r = IISet((r, ))
        if r is None:
            return IISet()
        if not_parm:
            exclude = self._apply_not(not_parm, resultset)
            r = difference(r, exclude)
        return r
Пример #29
0
 def numObjects(self):
     """Return the number of indexed objects."""
     setlist = []
     for fs in self.filteredSets.values():
         setlist.append(fs.getIds())
     return len(multiunion(setlist))
    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in 'request', which
        should be a mapping object.

        If the request does not contain the needed parameters, then
        return None.

        Otherwise return two objects.  The first object is a ResultSet
        containing the record numbers of the matching records.  The
        second object is a tuple containing the names of all data fields
        used.
        """
        iid = self.id
        record = parseIndexRequest(request, iid, self.query_options)
        if record.keys is None:
            return None

        term = self._convertDateTime(record.keys[0])
        REQUEST = aq_get(self, 'REQUEST', None)
        if REQUEST is not None:
            catalog = aq_parent(aq_parent(aq_inner(self)))
            if catalog is not None:
                key = self._cache_key(catalog)
                cache = REQUEST.get(key, None)
                tid = isinstance(term, int) and term / 10 or 'None'
                if resultset is None:
                    cachekey = '_daterangeindex_%s_%s' % (iid, tid)
                else:
                    cachekey = '_daterangeindex_inverse_%s_%s' % (iid, tid)
                if cache is None:
                    cache = REQUEST[key] = RequestCache()
                else:
                    cached = cache.get(cachekey, None)
                    if cached is not None:
                        if resultset is None:
                            return (cached,
                                    (self._since_field, self._until_field))
                        else:
                            return (difference(resultset, cached),
                                    (self._since_field, self._until_field))

        if resultset is None:
            # Aggregate sets for each bucket separately, to avoid
            # large-small union penalties.
            until_only = multiunion(self._until_only.values(term))
            since_only = multiunion(self._since_only.values(None, term))
            until = multiunion(self._until.values(term))

            # Total result is bound by resultset
            if REQUEST is None:
                until = intersection(resultset, until)

            since = multiunion(self._since.values(None, term))
            bounded = intersection(until, since)

            # Merge from smallest to largest.
            result = multiunion([bounded, until_only, since_only,
                                 self._always])
            if REQUEST is not None and catalog is not None:
                cache[cachekey] = result

            return (result, (self._since_field, self._until_field))
        else:
            # Compute the inverse and subtract from res
            until_only = multiunion(self._until_only.values(None, term - 1))
            since_only = multiunion(self._since_only.values(term + 1))
            until = multiunion(self._until.values(None, term - 1))
            since = multiunion(self._since.values(term + 1))

            result = multiunion([since, since_only, until_only, until])
            if REQUEST is not None and catalog is not None:
                cache[cachekey] = result

            return (difference(resultset, result),
                    (self._since_field, self._until_field))
def daterangeindex_apply_index(self, request, cid='', res=None):
    record = parseIndexRequest(request, self.getId())
    if record.keys is None:
        return None

    term = self._convertDateTime(record.keys[0])

    REQUEST = getattr(self, 'REQUEST', None)
    if REQUEST is not None:
        catalog = aq_parent(aq_parent(aq_inner(self)))
        if catalog is not None:
            key = '%s_%s' % (catalog.getId(), catalog.getCounter())
            cache = REQUEST.get(key, None)
            tid = isinstance(term, int) and term / 10 or 'None'
            index_id = self.getId()
            if res is None:
                cachekey = '_daterangeindex_%s_%s' % (index_id, tid)
            else:
                cachekey = '_daterangeindex_inverse_%s_%s' % (index_id, tid)
            if cache is None:
                cache = REQUEST[key] = RequestCache()
            else:
                cached = cache.get(cachekey, None)
                if cached is not None:
                    if res is None:
                        return cached, (self._since_field, self._until_field)
                    else:
                        return (difference(res, cached), (self._since_field,
                                                          self._until_field))

    if res is None:
        #
        #   Aggregate sets for each bucket separately, to avoid
        #   large-small union penalties.
        #   XXX Does this apply for multiunion?
        #
        until_only = multiunion(self._until_only.values(term))
        since_only = multiunion(self._since_only.values(None, term))
        until = multiunion(self._until.values(term))

        # Total result is bound by res
        if REQUEST is None:
            until = intersection(res, until)

        since = multiunion(self._since.values(None, term))
        bounded = intersection(until, since)
        result = multiunion([bounded, until_only, since_only, self._always])

        if REQUEST is not None and catalog is not None:
            cache[cachekey] = result

        return result, (self._since_field, self._until_field)
    else:
        # Compute the inverse and subtract from res
        until_only = multiunion(self._until_only.values(None, term - 1))
        since_only = multiunion(self._since_only.values(term + 1))
        until = multiunion(self._until.values(None, term - 1))
        since = multiunion(self._since.values(term + 1))

        result = multiunion([until_only, since_only, until, since])
        if REQUEST is not None and catalog is not None:
            cache[cachekey] = result
        return difference(res, result), (self._since_field, self._until_field)
    def search(self, path, default_level=0, depth=-1, navtree=0,
                                                             navtree_start=0):
        """
        path is either a string representing a
        relative URL or a part of a relative URL or
        a tuple (path,level).

        level >= 0  starts searching at the given level
        level <  0  not implemented yet
        """

        if isinstance(path, basestring):
            startlevel = default_level
        else:
            startlevel = int(path[1])
            path = path[0]

        absolute_path = isinstance(path, basestring) and path.startswith('/')

        comps = filter(None, path.split('/'))

        orig_comps = [''] + comps[:]
        # Optimization - avoid using the root set
        # as it is common for all objects anyway and add overhead
        # There is an assumption about catalog/index having
        # the same container as content
        if default_level == 0:
            indexpath = list(filter(None, self.getPhysicalPath()))
            while min(len(indexpath), len(comps)):
                if indexpath[0] == comps[0]:
                    del indexpath[0]
                    del comps[0]
                    startlevel += 1
                else:
                    break

        if len(comps) == 0:
            if depth == -1 and not navtree:
                return IISet(self._unindex.keys())

        # Make sure that we get depth = 1 if in navtree mode
        # unless specified otherwise

        orig_depth = depth
        if depth == -1:
            depth = 0 or navtree

        # Optimized navtree starting with absolute path
        if absolute_path and navtree and depth == 1 and default_level==0:
            set_list = []
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    set_list.append(self._index_parents[parent_path])
                except KeyError:
                    pass
            return multiunion(set_list)
        # Optimized breadcrumbs
        elif absolute_path and navtree and depth == 0 and default_level==0:
            item_list = IISet()
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    item_list.insert(self._index_items[parent_path])
                except KeyError:
                    pass
            return item_list
        # Specific object search
        elif absolute_path and orig_depth == 0 and default_level == 0:
            try:
                return IISet([self._index_items[path]])
            except KeyError:
                return IISet()
        # Single depth search
        elif absolute_path and orig_depth == 1 and default_level == 0:
            # only get objects contained in requested folder
            try:
                return self._index_parents[path]
            except KeyError:
                return IISet()
        # Sitemaps, relative paths, and depth queries
        elif startlevel >= 0:

            pathset = None # Same as pathindex
            navset  = None # For collecting siblings along the way
            depthset = None # For limiting depth

            if navtree and depth and \
                   self._index.has_key(None) and \
                   self._index[None].has_key(startlevel):
                navset = self._index[None][startlevel]

            for level in range(startlevel, startlevel+len(comps) + depth):
                if level-startlevel < len(comps):
                    comp = comps[level-startlevel]
                    if not self._index.has_key(comp) or not self._index[comp].has_key(level): 
                        # Navtree is inverse, keep going even for
                        # nonexisting paths
                        if navtree:
                            pathset = IISet()
                        else:
                            return IISet()
                    else:
                        pathset = intersection(pathset,
                                                     self._index[comp][level])
                    if navtree and depth and \
                           self._index.has_key(None) and \
                           self._index[None].has_key(level+depth):
                        navset  = union(navset, intersection(pathset,
                                              self._index[None][level+depth]))
                if level-startlevel >= len(comps) or navtree:
                    if self._index.has_key(None) and self._index[None].has_key(level):
                        depthset = union(depthset, intersection(pathset,
                                                    self._index[None][level]))

            if navtree:
                return union(depthset, navset) or IISet()
            elif depth:
                return depthset or IISet()
            else:
                return pathset or IISet()

        else:
            results = IISet()
            for level in range(0,self._depth + 1):
                ids = None
                error = 0
                for cn in range(0,len(comps)):
                    comp = comps[cn]
                    try:
                        ids = intersection(ids,self._index[comp][level+cn])
                    except KeyError:
                        error = 1
                if error==0:
                    results = union(results,ids)
            return results
Пример #33
0
            if 'max' in opr_args:
                hi = max(keys)
            else:
                hi = None

            if hi:
                setlist = index.values(lo,hi)
            else:
                setlist = index.values(lo)

            #for k, set in setlist:
                #if type(set) is IntType:
                    #set = IISet((set,))
                #r = set_func(r, set)
            # XXX: Use multiunion!
            r = multiunion(setlist)

        else: # not a range search
            for key in keys:
                set = index.get(key, None)
                if set is not None:
                    if type(set) is IntType:
                        set = IISet((set,))
                    r = set_func(r, set)

        if type(r) is IntType:
            r = IISet((r,))

        if r is None:
            return IISet(), (self.id,)
        else:
Пример #34
0
    def search(self,
               path,
               default_level=0,
               depth=-1,
               navtree=0,
               navtree_start=0):
        """
        path is either a string representing a
        relative URL or a part of a relative URL or
        a tuple (path,level).

        level >= 0  starts searching at the given level
        level <  0  not implemented yet
        """

        if isinstance(path, basestring):
            startlevel = default_level
        else:
            startlevel = int(path[1])
            path = path[0]

        absolute_path = isinstance(path, basestring) and path.startswith('/')

        comps = filter(None, path.split('/'))

        orig_comps = [''] + comps[:]
        # Optimization - avoid using the root set
        # as it is common for all objects anyway and add overhead
        # There is an assumption about catalog/index having
        # the same container as content
        if default_level == 0:
            indexpath = list(filter(None, self.getPhysicalPath()))
            while min(len(indexpath), len(comps)):
                if indexpath[0] == comps[0]:
                    del indexpath[0]
                    del comps[0]
                    startlevel += 1
                else:
                    break

        if len(comps) == 0:
            if depth == -1 and not navtree:
                return IISet(self._unindex.keys())

        # Make sure that we get depth = 1 if in navtree mode
        # unless specified otherwise

        orig_depth = depth
        if depth == -1:
            depth = 0 or navtree

        # Optimized navtree starting with absolute path
        if absolute_path and navtree and depth == 1 and default_level == 0:
            set_list = []
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    set_list.append(self._index_parents[parent_path])
                except KeyError:
                    pass
            return multiunion(set_list)
        # Optimized breadcrumbs
        elif absolute_path and navtree and depth == 0 and default_level == 0:
            item_list = IISet()
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    item_list.insert(self._index_items[parent_path])
                except KeyError:
                    pass
            return item_list
        # Specific object search
        elif absolute_path and orig_depth == 0 and default_level == 0:
            try:
                return IISet([self._index_items[path]])
            except KeyError:
                return IISet()
        # Single depth search
        elif absolute_path and orig_depth == 1 and default_level == 0:
            # only get objects contained in requested folder
            try:
                return self._index_parents[path]
            except KeyError:
                return IISet()
        # Sitemaps, relative paths, and depth queries
        elif startlevel >= 0:

            pathset = None  # Same as pathindex
            navset = None  # For collecting siblings along the way
            depthset = None  # For limiting depth

            if navtree and depth and \
                   self._index.has_key(None) and \
                   self._index[None].has_key(startlevel):
                navset = self._index[None][startlevel]

            for level in range(startlevel, startlevel + len(comps) + depth):
                if level - startlevel < len(comps):
                    comp = comps[level - startlevel]
                    if not self._index.has_key(
                            comp) or not self._index[comp].has_key(level):
                        # Navtree is inverse, keep going even for
                        # nonexisting paths
                        if navtree:
                            pathset = IISet()
                        else:
                            return IISet()
                    else:
                        pathset = intersection(pathset,
                                               self._index[comp][level])
                    if navtree and depth and \
                           self._index.has_key(None) and \
                           self._index[None].has_key(level+depth):
                        navset = union(
                            navset,
                            intersection(pathset,
                                         self._index[None][level + depth]))
                if level - startlevel >= len(comps) or navtree:
                    if self._index.has_key(None) and self._index[None].has_key(
                            level):
                        depthset = union(
                            depthset,
                            intersection(pathset, self._index[None][level]))

            if navtree:
                return union(depthset, navset) or IISet()
            elif depth:
                return depthset or IISet()
            else:
                return pathset or IISet()

        else:
            results = IISet()
            for level in range(0, self._depth + 1):
                ids = None
                error = 0
                for cn in range(0, len(comps)):
                    comp = comps[cn]
                    try:
                        ids = intersection(ids, self._index[comp][level + cn])
                    except KeyError:
                        error = 1
                if error == 0:
                    results = union(results, ids)
            return results
def daterangeindex_apply_index(self, request, cid='', res=None):
    record = parseIndexRequest(request, self.getId())
    if record.keys is None:
        return None

    term = self._convertDateTime(record.keys[0])

    REQUEST = getattr(self, 'REQUEST', None)
    if REQUEST is not None:
        catalog = aq_parent(aq_parent(aq_inner(self)))
        if catalog is not None:
            key = '%s_%s'%(catalog.getId(), catalog.getCounter())
            cache = REQUEST.get(key, None)
            tid = isinstance(term, int) and term / 10 or 'None'
            index_id = self.getId()
            if res is None:
                cachekey = '_daterangeindex_%s_%s' % (index_id, tid)
            else:
                cachekey = '_daterangeindex_inverse_%s_%s' % (index_id, tid)
            if cache is None:
                cache = REQUEST[key] = RequestCache()
            else:
                cached = cache.get(cachekey, None)
                if cached is not None:
                    if res is None:
                        return cached, (self._since_field, self._until_field)
                    else:
                        return (difference(res, cached),
                            (self._since_field, self._until_field))

    if res is None:
        #
        #   Aggregate sets for each bucket separately, to avoid
        #   large-small union penalties.
        #   XXX Does this apply for multiunion?
        #
        until_only = multiunion(self._until_only.values(term))
        since_only = multiunion(self._since_only.values(None, term))
        until = multiunion(self._until.values(term))

        # Total result is bound by res
        if REQUEST is None:
            until = intersection(res, until)

        since = multiunion(self._since.values(None, term))
        bounded = intersection(until, since)
        result = multiunion([bounded, until_only, since_only, self._always])

        if REQUEST is not None and catalog is not None:
            cache[cachekey] = result

        return result, (self._since_field, self._until_field)
    else:
        # Compute the inverse and subtract from res
        until_only = multiunion(self._until_only.values(None, term - 1))
        since_only = multiunion(self._since_only.values(term + 1))
        until = multiunion(self._until.values(None, term - 1))
        since = multiunion(self._since.values(term + 1))

        result = multiunion([until_only, since_only, until, since])
        if REQUEST is not None and catalog is not None:
            cache[cachekey] = result
        return difference(res, result), (self._since_field, self._until_field)
Пример #36
0
    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in the argument

        Normalize the 'query' arguments into integer values at minute
        precision before querying.
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        keys = map(self._convert, record.keys)

        index = self._index
        r = None
        opr = None

        #experimental code for specifing the operator
        operator = record.get('operator', self.useOperator)
        if not operator in self.operators:
            raise RuntimeError("operator not valid: %s" % operator)

        # depending on the operator we use intersection or union
        if operator == "or":
            set_func = union
        else:
            set_func = intersection

        # range parameter
        range_arg = record.get('range', None)
        if range_arg:
            opr = "range"
            opr_args = []
            if range_arg.find("min") > -1:
                opr_args.append("min")
            if range_arg.find("max") > -1:
                opr_args.append("max")

        if record.get('usage', None):
            # see if any usage params are sent to field
            opr = record.usage.lower().split(':')
            opr, opr_args = opr[0], opr[1:]

        if opr == "range":  # range search
            if 'min' in opr_args:
                lo = min(keys)
            else:
                lo = None

            if 'max' in opr_args:
                hi = max(keys)
            else:
                hi = None

            if hi:
                setlist = index.values(lo, hi)
            else:
                setlist = index.values(lo)

            r = multiunion(setlist)

        else:  # not a range search
            for key in keys:
                set = index.get(key, None)
                if set is not None:
                    if isinstance(set, int):
                        set = IISet((set, ))
                    else:
                        # set can't be bigger than resultset
                        set = intersection(set, resultset)
                    r = set_func(r, set)

        if isinstance(r, int):
            r = IISet((r, ))

        if r is None:
            return IISet(), (self.id, )
        else:
            return r, (self.id, )
Пример #37
0
 def multiunion(self, *args):
     from BTrees.LFBTree import multiunion
     return multiunion(*args)
Пример #38
0
    def query_index(self, record, resultset=None):
        """Search the index with the given IndexQuery object.

        If the query has a key which matches the 'id' of
        the index instance, one of a few things can happen:

          - if the value is a string, turn the value into
            a single-element sequence, and proceed.

          - if the value is a sequence, return a union search.

          - If the value is a dict and contains a key of the form
            '<index>_operator' this overrides the default method
            ('or') to combine search results. Valid values are 'or'
            and 'and'.
        """
        index = self._index
        r = None
        opr = None

        # not / exclude parameter
        not_parm = record.get('not', None)

        operator = record.operator

        cachekey = None
        cache = self.getRequestCache()
        if cache is not None:
            cachekey = self.getRequestCacheKey(record)
            if cachekey is not None:
                cached = None
                if operator == 'or':
                    cached = cache.get(cachekey, None)
                else:
                    cached_setlist = cache.get(cachekey, None)
                    if cached_setlist is not None:
                        r = resultset
                        for s in cached_setlist:
                            # the result is bound by the resultset
                            r = intersection(r, s)
                            # If intersection, we can't possibly get a
                            # smaller result
                            if not r:
                                break
                        cached = r

                if cached is not None:
                    if isinstance(cached, int):
                        cached = IISet((cached, ))

                    if not_parm:
                        not_parm = list(map(self._convert, not_parm))
                        exclude = self._apply_not(not_parm, resultset)
                        cached = difference(cached, exclude)

                    return cached

        if not record.keys and not_parm:
            # convert into indexed format
            not_parm = list(map(self._convert, not_parm))
            # we have only a 'not' query
            record.keys = [k for k in index.keys() if k not in not_parm]
        else:
            # convert query arguments into indexed format
            record.keys = list(map(self._convert, record.keys))

        # Range parameter
        range_parm = record.get('range', None)
        if range_parm:
            opr = "range"
            opr_args = []
            if range_parm.find("min") > -1:
                opr_args.append("min")
            if range_parm.find("max") > -1:
                opr_args.append("max")

        if record.get('usage', None):
            # see if any usage params are sent to field
            opr = record.usage.lower().split(':')
            opr, opr_args = opr[0], opr[1:]

        if opr == "range":  # range search
            if 'min' in opr_args:
                lo = min(record.keys)
            else:
                lo = None
            if 'max' in opr_args:
                hi = max(record.keys)
            else:
                hi = None
            if hi:
                setlist = index.values(lo, hi)
            else:
                setlist = index.values(lo)

            # If we only use one key, intersect and return immediately
            if len(setlist) == 1:
                result = setlist[0]
                if isinstance(result, int):
                    result = IISet((result, ))

                if cachekey is not None:
                    if operator == 'or':
                        cache[cachekey] = result
                    else:
                        cache[cachekey] = [result]

                if not_parm:
                    exclude = self._apply_not(not_parm, resultset)
                    result = difference(result, exclude)
                return result

            if operator == 'or':
                tmp = []
                for s in setlist:
                    if isinstance(s, int):
                        s = IISet((s, ))
                    tmp.append(s)
                r = multiunion(tmp)

                if cachekey is not None:
                    cache[cachekey] = r
            else:
                # For intersection, sort with smallest data set first
                tmp = []
                for s in setlist:
                    if isinstance(s, int):
                        s = IISet((s, ))
                    tmp.append(s)
                if len(tmp) > 2:
                    setlist = sorted(tmp, key=len)
                else:
                    setlist = tmp

                # 'r' is not invariant of resultset. Thus, we
                # have to remember 'setlist'
                if cachekey is not None:
                    cache[cachekey] = setlist

                r = resultset
                for s in setlist:
                    # the result is bound by the resultset
                    r = intersection(r, s)
                    # If intersection, we can't possibly get a smaller result
                    if not r:
                        break

        else:  # not a range search
            # Filter duplicates
            setlist = []
            for k in record.keys:
                if k is None:
                    # Prevent None from being looked up. None doesn't
                    # have a valid ordering definition compared to any
                    # other object. BTrees 4.0+ will throw a TypeError
                    # "object has default comparison".
                    continue
                s = index.get(k, None)
                # If None, try to bail early
                if s is None:
                    if operator == 'or':
                        # If union, we can possibly get a bigger result
                        continue
                    # If intersection, we can't possibly get a smaller result
                    if cachekey is not None:
                        # If operator is 'and', we have to cache a list of
                        # IISet objects
                        cache[cachekey] = [IISet()]
                    return IISet()
                elif isinstance(s, int):
                    s = IISet((s, ))
                setlist.append(s)

            # If we only use one key return immediately
            if len(setlist) == 1:
                result = setlist[0]
                if isinstance(result, int):
                    result = IISet((result, ))

                if cachekey is not None:
                    if operator == 'or':
                        cache[cachekey] = result
                    else:
                        cache[cachekey] = [result]

                if not_parm:
                    exclude = self._apply_not(not_parm, resultset)
                    result = difference(result, exclude)
                return result

            if operator == 'or':
                # If we already get a small result set passed in, intersecting
                # the various indexes with it and doing the union later is
                # faster than creating a multiunion first.

                if resultset is not None and len(resultset) < 200:
                    smalllist = []
                    for s in setlist:
                        smalllist.append(intersection(resultset, s))
                    r = multiunion(smalllist)

                    # 'r' is not invariant of resultset.  Thus, we
                    # have to remember the union of 'setlist'. But
                    # this is maybe a performance killer. So we do not cache.
                    # if cachekey is not None:
                    #    cache[cachekey] = multiunion(setlist)

                else:
                    r = multiunion(setlist)
                    if cachekey is not None:
                        cache[cachekey] = r
            else:
                # For intersection, sort with smallest data set first
                if len(setlist) > 2:
                    setlist = sorted(setlist, key=len)

                # 'r' is not invariant of resultset. Thus, we
                # have to remember the union of 'setlist'
                if cachekey is not None:
                    cache[cachekey] = setlist

                r = resultset
                for s in setlist:
                    r = intersection(r, s)
                    # If intersection, we can't possibly get a smaller result
                    if not r:
                        break

        if isinstance(r, int):
            r = IISet((r, ))
        if r is None:
            return IISet()
        if not_parm:
            exclude = self._apply_not(not_parm, resultset)
            r = difference(r, exclude)
        return r
Пример #39
0
 def multiunion(self, *args):
     from BTrees.IIBTree import multiunion
     return multiunion(*args)
Пример #40
0
    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in 'request'.

        The argument should be a mapping object.

        If the request does not contain the needed parameters, then
        None is returned.

        If the request contains a parameter with the name of the
        column and this parameter is either a Record or a class
        instance then it is assumed that the parameters of this index
        are passed as attribute (Note: this is the recommended way to
        pass parameters since Zope 2.4)

        Otherwise two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.

        The resultset argument contains the resultset, as already calculated by
        ZCatalog's search method.
        """
        if not request.has_key(self._id):  # 'in' doesn't work with this object
            return IITreeSet(self._uid2end.keys()), ()

        start = self._get_position(request, 'start')
        end = self._get_position(request, 'end')

        used_fields = ()

        # We don't want the events who end before the start. In other
        # words we want to find those evens whose end >= the start query,
        # or None as None means they have infinite recurrence.
        try:
            maxkey = self._end2uid.maxKey()
        except ValueError:  # No events at all
            return IITreeSet(), used_fields

        if start is None or maxkey is None:
            # start is None, so we need to search right from the start; or
            # (amazingly) all events have infinite recurrence.
            # This means we must return *all* uids.
            start_uids = IITreeSet(self._uid2end.keys())
        else:
            used_fields += (self.start_attr,)
            #start_uids = IITreeSet()
            start = start.utctimetuple()
            try:
                minkey = self._end2uid.minKey(start)
                # Events that end on exactly the same same time as the
                # search period start should not be included:
                if minkey == start:
                    excludemin = True
                else:
                    excludemin = False

                start_uids = multiunion(self._end2uid.values(minkey, maxkey, excludemin=excludemin))

            except ValueError:
                # No ending events
                start_uids = IITreeSet()

            # Include open ended events, if any
            if self._end2uid.has_key(None):
                start_uids = union(start_uids, self._end2uid[None])

        # XXX At this point an intersection with the resultset might be
        # beneficial. It would stop us from calculating the recurrence
        # of ids that won't be returned. It could be done after the
        # intersection with end_uids below as well, performance tests will tell.

        # We also do not want the events whose start come after the end query.
        # In other words, we find all events where start <= end.
        if end is not None:
            end = end.utctimetuple()
            try:
                minkey = self._start2uid.minKey()
                end_uids = multiunion(self._start2uid.values(minkey, end))
                used_fields += (self.end_attr,)

            except ValueError:
                # No events
                return IITreeSet(), used_fields

            result = intersection(start_uids, end_uids)
        else:
            # No end specified, take all:
            result = start_uids

        return self._finalize_index(result, start, end, used_fields)
Пример #41
0
    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in the request arg.

        The request argument should be a mapping object.

        If the request does not have a key which matches the "id" of
        the index instance, then None is returned.

        If the request *does* have a key which matches the "id" of
        the index instance, one of a few things can happen:

          - if the value is a blank string, None is returned (in
            order to support requests from web forms where
            you can't tell a blank string from empty).

          - if the value is a nonblank string, turn the value into
            a single-element sequence, and proceed.

          - if the value is a sequence, return a union search.

          - If the value is a dict and contains a key of the form
            '<index>_operator' this overrides the default method
            ('or') to combine search results. Valid values are "or"
            and "and".

        If None is not returned as a result of the abovementioned
        constraints, two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.

        FAQ answer:  to search a Field Index for documents that
        have a blank string as their value, wrap the request value
        up in a tuple ala: request = {'id':('',)}
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        index = self._index
        r = None
        opr = None

        # not / exclude parameter
        not_parm = record.get('not', None)
        if not record.keys and not_parm:
            # convert into indexed format
            not_parm = map(self._convert, not_parm)
            # we have only a 'not' query
            record.keys = [k for k in index.keys() if k not in not_parm]
        else:
            # convert query arguments into indexed format
            record.keys = map(self._convert, record.keys)

        # experimental code for specifing the operator
        operator = record.get('operator', self.useOperator)
        if not operator in self.operators:
            raise RuntimeError("operator not valid: %s" % escape(operator))

        # Range parameter
        range_parm = record.get('range', None)
        if range_parm:
            opr = "range"
            opr_args = []
            if range_parm.find("min") > -1:
                opr_args.append("min")
            if range_parm.find("max") > -1:
                opr_args.append("max")

        if record.get('usage', None):
            # see if any usage params are sent to field
            opr = record.usage.lower().split(':')
            opr, opr_args = opr[0], opr[1:]

        if opr == "range":  # range search
            if 'min' in opr_args:
                lo = min(record.keys)
            else:
                lo = None
            if 'max' in opr_args:
                hi = max(record.keys)
            else:
                hi = None
            if hi:
                setlist = index.values(lo, hi)
            else:
                setlist = index.values(lo)

            # If we only use one key, intersect and return immediately
            if len(setlist) == 1:
                result = setlist[0]
                if isinstance(result, int):
                    result = IISet((result,))
                if not_parm:
                    exclude = self._apply_not(not_parm, resultset)
                    result = difference(result, exclude)
                return result, (self.id,)

            if operator == 'or':
                tmp = []
                for s in setlist:
                    if isinstance(s, int):
                        s = IISet((s,))
                    tmp.append(s)
                r = multiunion(tmp)
            else:
                # For intersection, sort with smallest data set first
                tmp = []
                for s in setlist:
                    if isinstance(s, int):
                        s = IISet((s,))
                    tmp.append(s)
                if len(tmp) > 2:
                    setlist = sorted(tmp, key=len)
                else:
                    setlist = tmp
                r = resultset
                for s in setlist:
                    # the result is bound by the resultset
                    r = intersection(r, s)

        else:  # not a range search
            # Filter duplicates
            setlist = []
            for k in record.keys:
                if k is None:
                    raise TypeError('None cannot be in an index.')
                s = index.get(k, None)
                # If None, try to bail early
                if s is None:
                    if operator == 'or':
                        # If union, we can't possibly get a bigger result
                        continue
                    # If intersection, we can't possibly get a smaller result
                    return IISet(), (self.id,)
                elif isinstance(s, int):
                    s = IISet((s,))
                setlist.append(s)

            # If we only use one key return immediately
            if len(setlist) == 1:
                result = setlist[0]
                if isinstance(result, int):
                    result = IISet((result,))
                if not_parm:
                    exclude = self._apply_not(not_parm, resultset)
                    result = difference(result, exclude)
                return result, (self.id,)

            if operator == 'or':
                # If we already get a small result set passed in, intersecting
                # the various indexes with it and doing the union later is
                # faster than creating a multiunion first.
                if resultset is not None and len(resultset) < 200:
                    smalllist = []
                    for s in setlist:
                        smalllist.append(intersection(resultset, s))
                    r = multiunion(smalllist)
                else:
                    r = multiunion(setlist)
            else:
                # For intersection, sort with smallest data set first
                if len(setlist) > 2:
                    setlist = sorted(setlist, key=len)
                r = resultset
                for s in setlist:
                    r = intersection(r, s)

        if isinstance(r, int):
            r = IISet((r, ))
        if r is None:
            return IISet(), (self.id,)
        if not_parm:
            exclude = self._apply_not(not_parm, resultset)
            r = difference(r, exclude)
        return r, (self.id,)
Пример #42
0
    def search(self,
               path,
               default_level=0,
               depth=-1,
               navtree=0,
               navtree_start=0):
        """
        path is either a string representing a
        relative URL or a part of a relative URL or
        a tuple (path,level).

        level >= 0  starts searching at the given level
        level <  0  not implemented yet
        """

        if isinstance(path, basestring):
            startlevel = default_level
        else:
            startlevel = int(path[1])
            path = path[0]

        absolute_path = isinstance(path, basestring) and path.startswith('/')
        comps = filter(None, path.split('/'))

        orig_comps = [''] + comps[:]

        if depth > 0:
            raise ValueError("Can't do depth searches anymore")
        if not comps:
            comps = ['dmd']
            startlevel = 1
        else:
            if comps[0] == getCSEConf().get('virtualroot',
                                            '').replace('/', ''):
                comps = comps[1:]
            if comps[0] == 'zport':
                comps = comps[1:]

        if comps[0] != 'dmd':
            raise ValueError("Depth searches must start with 'dmd'")
        startlevel = len(comps)

        if len(comps) == 0:
            if depth == -1 and not navtree:
                return IISet(self._unindex.keys())

        # Make sure that we get depth = 1 if in navtree mode
        # unless specified otherwise

        orig_depth = depth
        if depth == -1:
            depth = 0 or navtree

        # Optimized navtree starting with absolute path
        if absolute_path and navtree and depth == 1 and default_level == 0:
            set_list = []
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    set_list.append(self._index_parents[parent_path])
                except KeyError:
                    pass
            return multiunion(set_list)
        # Optimized breadcrumbs
        elif absolute_path and navtree and depth == 0 and default_level == 0:
            item_list = IISet()
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    item_list.insert(self._index_items[parent_path])
                except KeyError:
                    pass
            return item_list
        # Specific object search
        elif absolute_path and orig_depth == 0 and default_level == 0:
            try:
                return IISet([self._index_items[path]])
            except KeyError:
                return IISet()
        # Single depth search
        elif absolute_path and orig_depth == 1 and default_level == 0:
            # only get objects contained in requested folder
            try:
                return self._index_parents[path]
            except KeyError:
                return IISet()
        # Sitemaps, relative paths, and depth queries
        elif startlevel >= 0:

            pathset = None  # Same as pathindex
            navset = None  # For collecting siblings along the way
            depthset = None  # For limiting depth

            if navtree and depth and \
                   self._index.has_key(None) and \
                   self._index[None].has_key(startlevel):
                navset = self._index[None][startlevel]
            for level in range(startlevel, startlevel + len(comps)):
                if level <= len(comps):
                    comp = "/".join(comps[:level])
                    if (not self._index.has_key(comp)
                            or not self._index[comp].has_key(level)):
                        # Navtree is inverse, keep going even for
                        # nonexisting paths
                        if navtree:
                            pathset = IISet()
                        else:
                            return IISet()
                    else:
                        return self._index[comp][level]
                    if navtree and depth and \
                           self._index.has_key(None) and \
                           self._index[None].has_key(level+depth):
                        navset = union(
                            navset,
                            intersection(pathset,
                                         self._index[None][level + depth]))
                if level - startlevel >= len(comps) or navtree:
                    if (self._index.has_key(None)
                            and self._index[None].has_key(level)):
                        depthset = union(
                            depthset,
                            intersection(pathset, self._index[None][level]))

            if navtree:
                return union(depthset, navset) or IISet()
            elif depth:
                return depthset or IISet()
            else:
                return pathset or IISet()

        else:
            results = IISet()
            for level in range(0, self._depth + 1):
                ids = None
                error = 0
                for cn in range(0, len(comps)):
                    comp = comps[cn]
                    try:
                        ids = intersection(ids, self._index[comp][level + cn])
                    except KeyError:
                        error = 1
                if error == 0:
                    results = union(results, ids)
            return results
Пример #43
0
def unindex_apply_index(self, request, cid='', type=type, res=None):
    record = parseIndexRequest(request, self.id, self.query_options)
    if record.keys == None: return None

    index = self._index
    r = None
    opr = None

    # experimental code for specifing the operator
    operator = record.get('operator', self.useOperator)
    if not operator in self.operators:
        raise RuntimeError, "operator not valid: %s" % escape(operator)

    # depending on the operator we use intersection or union
    if operator == "or": set_func = union
    else: set_func = intersection

    # Range parameter
    range_parm = record.get('range', None)
    if range_parm:
        opr = "range"
        opr_args = []
        if range_parm.find("min") > -1:
            opr_args.append("min")
        if range_parm.find("max") > -1:
            opr_args.append("max")

    if record.get('usage', None):
        # see if any usage params are sent to field
        opr = record.usage.lower().split(':')
        opr, opr_args = opr[0], opr[1:]

    if opr == "range":  # range search
        if 'min' in opr_args: lo = min(record.keys)
        else: lo = None
        if 'max' in opr_args: hi = max(record.keys)
        else: hi = None
        if hi:
            setlist = index.values(lo, hi)
        else:
            setlist = index.values(lo)

        # If we only use 1 key (default setting), intersect and return immediately
        if len(setlist) == 1:
            result = setlist[0]
            if isinstance(result, int):
                result = IISet((result, ))
            return result, (self.id, )

        if operator == 'or':
            r = multiunion(setlist)
        else:
            # For intersection, sort with smallest data set first
            tmp = []
            for s in setlist:
                if isinstance(s, int):
                    s = IISet((s, ))
                tmp.append(s)
            if len(tmp) > 2:
                setlist = sorted(tmp, key=len)
            else:
                setlist = tmp
            r = res
            for s in setlist:
                r = intersection(r, s)

    else:  # not a range search
        # Filter duplicates, and sort by length
        keys = set(record.keys)
        setlist = []
        for k in keys:
            s = index.get(k, None)
            # If None, try to bail early
            if s is None:
                if operator == 'or':
                    # If union, we can't possibly get a bigger result
                    continue
                # If intersection, we can't possibly get a smaller result
                return IISet(), (self.id, )
            elif isinstance(s, int):
                s = IISet((s, ))
            setlist.append(s)

        # If we only use 1 key (default setting), intersect and return immediately
        if len(setlist) == 1:
            result = setlist[0]
            if isinstance(result, int):
                result = IISet((result, ))
            return result, (self.id, )

        if operator == 'or':
            # If we already get a small result set passed in, intersecting
            # the various indexes with it and doing the union later is faster
            # than creating a multiunion first.
            if res is not None and len(res) < 200:
                smalllist = []
                for s in setlist:
                    smalllist.append(intersection(res, s))
                r = multiunion(smalllist)
            else:
                r = multiunion(setlist)
        else:
            # For intersection, sort with smallest data set first
            if len(setlist) > 2:
                setlist = sorted(setlist, key=len)
            r = res
            for s in setlist:
                r = intersection(r, s)

    if isinstance(r, int): r = IISet((r, ))
    if r is None:
        return IISet(), (self.id, )
    else:
        return r, (self.id, )
Пример #44
0
    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in the request arg.

        The request argument should be a mapping object.

        If the request does not have a key which matches the "id" of
        the index instance, then None is returned.

        If the request *does* have a key which matches the "id" of
        the index instance, one of a few things can happen:

          - if the value is a blank string, None is returned (in
            order to support requests from web forms where
            you can't tell a blank string from empty).

          - if the value is a nonblank string, turn the value into
            a single-element sequence, and proceed.

          - if the value is a sequence, return a union search.

          - If the value is a dict and contains a key of the form
            '<index>_operator' this overrides the default method
            ('or') to combine search results. Valid values are "or"
            and "and".

        If None is not returned as a result of the abovementioned
        constraints, two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.

        FAQ answer:  to search a Field Index for documents that
        have a blank string as their value, wrap the request value
        up in a tuple ala: request = {'id':('',)}
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        index = self._index
        r = None
        opr = None

        # not / exclude parameter
        not_parm = record.get('not', None)
        if not record.keys and not_parm:
            # convert into indexed format
            not_parm = map(self._convert, not_parm)
            # we have only a 'not' query
            record.keys = [k for k in index.keys() if k not in not_parm]
        else:
            # convert query arguments into indexed format
            record.keys = map(self._convert, record.keys)

        # experimental code for specifing the operator
        operator = record.get('operator', self.useOperator)
        if not operator in self.operators:
            raise RuntimeError("operator not valid: %s" % escape(operator))

        # Range parameter
        range_parm = record.get('range', None)
        if range_parm:
            opr = "range"
            opr_args = []
            if range_parm.find("min") > -1:
                opr_args.append("min")
            if range_parm.find("max") > -1:
                opr_args.append("max")

        if record.get('usage', None):
            # see if any usage params are sent to field
            opr = record.usage.lower().split(':')
            opr, opr_args = opr[0], opr[1:]

        if opr == "range":  # range search
            if 'min' in opr_args:
                lo = min(record.keys)
            else:
                lo = None
            if 'max' in opr_args:
                hi = max(record.keys)
            else:
                hi = None
            if hi:
                setlist = index.values(lo, hi)
            else:
                setlist = index.values(lo)

            # If we only use one key, intersect and return immediately
            if len(setlist) == 1:
                result = setlist[0]
                if isinstance(result, int):
                    result = IISet((result, ))
                if not_parm:
                    exclude = self._apply_not(not_parm, resultset)
                    result = difference(result, exclude)
                return result, (self.id, )

            if operator == 'or':
                tmp = []
                for s in setlist:
                    if isinstance(s, int):
                        s = IISet((s, ))
                    tmp.append(s)
                r = multiunion(tmp)
            else:
                # For intersection, sort with smallest data set first
                tmp = []
                for s in setlist:
                    if isinstance(s, int):
                        s = IISet((s, ))
                    tmp.append(s)
                if len(tmp) > 2:
                    setlist = sorted(tmp, key=len)
                else:
                    setlist = tmp
                r = resultset
                for s in setlist:
                    # the result is bound by the resultset
                    r = intersection(r, s)

        else:  # not a range search
            # Filter duplicates
            setlist = []
            for k in record.keys:
                s = index.get(k, None)
                # If None, try to bail early
                if s is None:
                    if operator == 'or':
                        # If union, we can't possibly get a bigger result
                        continue
                    # If intersection, we can't possibly get a smaller result
                    return IISet(), (self.id, )
                elif isinstance(s, int):
                    s = IISet((s, ))
                setlist.append(s)

            # If we only use one key return immediately
            if len(setlist) == 1:
                result = setlist[0]
                if isinstance(result, int):
                    result = IISet((result, ))
                if not_parm:
                    exclude = self._apply_not(not_parm, resultset)
                    result = difference(result, exclude)
                return result, (self.id, )

            if operator == 'or':
                # If we already get a small result set passed in, intersecting
                # the various indexes with it and doing the union later is
                # faster than creating a multiunion first.
                if resultset is not None and len(resultset) < 200:
                    smalllist = []
                    for s in setlist:
                        smalllist.append(intersection(resultset, s))
                    r = multiunion(smalllist)
                else:
                    r = multiunion(setlist)
            else:
                # For intersection, sort with smallest data set first
                if len(setlist) > 2:
                    setlist = sorted(setlist, key=len)
                r = resultset
                for s in setlist:
                    r = intersection(r, s)

        if isinstance(r, int):
            r = IISet((r, ))
        if r is None:
            return IISet(), (self.id, )
        if not_parm:
            exclude = self._apply_not(not_parm, resultset)
            r = difference(r, exclude)
        return r, (self.id, )
Пример #45
0
    def _apply_index(self, request, resultset=None):
        """
            Apply the index to query parameters given in 'request', which
            should be a mapping object.

            If the request does not contain the needed parameters, then
            return None.

            Otherwise return two objects.  The first object is a ResultSet
            containing the record numbers of the matching records.  The
            second object is a tuple containing the names of all data fields
            used.
        """
        iid = self.id
        record = parseIndexRequest(request, iid, self.query_options)
        if record.keys is None:
            return None

        term = self._convertDateTime(record.keys[0])
        REQUEST = aq_get(self, 'REQUEST', None)
        if REQUEST is not None:
            catalog = aq_parent(aq_parent(aq_inner(self)))
            if catalog is not None:
                key = self._cache_key(catalog)
                cache = REQUEST.get(key, None)
                tid = isinstance(term, int) and term / 10 or 'None'
                if resultset is None:
                    cachekey = '_daterangeindex_%s_%s' % (iid, tid)
                else:
                    cachekey = '_daterangeindex_inverse_%s_%s' % (iid, tid)
                if cache is None:
                    cache = REQUEST[key] = RequestCache()
                else:
                    cached = cache.get(cachekey, None)
                    if cached is not None:
                        if resultset is None:
                            return (cached, (self._since_field,
                                             self._until_field))
                        else:
                            return (difference(resultset, cached),
                                    (self._since_field, self._until_field))

        if resultset is None:
            # Aggregate sets for each bucket separately, to avoid
            # large-small union penalties.
            until_only = multiunion(self._until_only.values(term))
            since_only = multiunion(self._since_only.values(None, term))
            until = multiunion(self._until.values(term))

            # Total result is bound by resultset
            if REQUEST is None:
                until = intersection(resultset, until)

            since = multiunion(self._since.values(None, term))
            bounded = intersection(until, since)

            # Merge from smallest to largest.
            result = multiunion(
                [bounded, until_only, since_only, self._always])
            if REQUEST is not None and catalog is not None:
                cache[cachekey] = result

            return (result, (self._since_field, self._until_field))
        else:
            # Compute the inverse and subtract from res
            until_only = multiunion(self._until_only.values(None, term - 1))
            since_only = multiunion(self._since_only.values(term + 1))
            until = multiunion(self._until.values(None, term - 1))
            since = multiunion(self._since.values(term + 1))

            result = multiunion([until_only, since_only, until, since])
            if REQUEST is not None and catalog is not None:
                cache[cachekey] = result

            return (difference(resultset,
                               result), (self._since_field, self._until_field))
def extendedpathindex_search(self, path, default_level=0, depth=-1, navtree=0,
           navtree_start=0, tmpres=None):
    """
    path is either a string representing a
    relative URL or a part of a relative URL or
    a tuple (path,level).
    
    default_level specifies the level to use when no more specific
    level has been passed in with the path.

    level >= 0  starts searching at the given level
    level <  0  finds matches at *any* level
    
    depth let's you limit the results to items at most depth levels deeper
    than the matched path. depth == 0 means no subitems are included at all,
    with depth == 1 only direct children are included, etc. depth == -1, the
    default, returns all children at any depth.
    
    navtree is treated as a boolean; if it evaluates to True, not only the
    query match is returned, but also each container in the path. If depth
    is greater than 0, also all siblings of those containers, as well as the
    siblings of the match are included as well, plus *all* documents at the
    starting level.
    
    navtree_start limits what containers are included in a navtree search.
    If greater than 0, only containers (and possibly their siblings) at that
    level and up will be included in the resultset.

    """
    if isinstance(path, basestring):
        level = default_level
    else:
        level = int(path[1])
        path = path[0]

    if level < 0:
        # Search at every level, return the union of all results
        return multiunion(
            [self.search(path, level, depth, navtree, navtree_start)
             for level in xrange(self._depth + 1)])

    comps = filter(None, path.split('/'))

    if navtree and depth == -1: # Navtrees don't do recursive
        depth = 1

    #
    # Optimisations
    #
    
    pathlength = level + len(comps) - 1
    if navtree and navtree_start > min(pathlength + depth, self._depth):
        # This navtree_start excludes all items that match the depth
        return IISet()
    if pathlength > self._depth:
        # Our search is for a path longer than anything in the index
        return IISet()

    if level == 0 and depth in (0, 1):
        # We have easy indexes for absolute paths where
        # we are looking for depth 0 or 1 result sets
        if navtree:
            # Optimized absolute path navtree and breadcrumbs cases
            result = []
            add = lambda x: x is not None and result.append(x)
            if depth == 1:
                # Navtree case, all sibling elements along the path
                convert = multiunion
                index = self._index_parents
            else:
                # Breadcrumbs case, all direct elements along the path
                convert = IISet
                index = self._index_items
            # Collect all results along the path
            for i in range(len(comps), navtree_start - 1, -1):
                parent_path = '/' + '/'.join(comps[:i])
                add(index.get(parent_path))
            return convert(result)
        
        if not path.startswith('/'):
            path = '/' + path
        if depth == 0:
            # Specific object search
            res = self._index_items.get(path)
            return res and IISet([res]) or IISet()
        else:
            # Single depth search
            return self._index_parents.get(path, IISet())
    
    # Avoid using the root set
    # as it is common for all objects anyway and add overhead
    # There is an assumption about all indexed values having the
    # same common base path
    if level == 0:
        indexpath = list(filter(None, self.getPhysicalPath()))
        minlength = min(len(indexpath), len(comps))
        # Truncate path to first different element
        for i in xrange(minlength):
            if indexpath[i] != comps[i]:
                break
            level += 1
        comps = comps[level:]

    if not comps and depth == -1:
        # Recursive search for everything
        return IISet(self._unindex)
    
    #
    # Core application of the indexes
    #

    pathset  = None
    depthset = None # For limiting depth

    if navtree and depth > 0:
        # Include the elements up to the matching path
        depthset = multiunion([
            self._index.get(None, {}).get(i, IISet())
            for i in range(min(navtree_start, level), 
                           max(navtree_start, level) + 1)])
    
    indexedcomps = enumerate(comps)
    if not navtree:
        # Optimize relative-path searches by starting with the
        # presumed smaller sets at the end of the path first
        # We can't do this for the navtree case because it needs
        # the bigger rootset to include siblings along the way.
        indexedcomps = list(indexedcomps)
        indexedcomps.reverse()
    
    for i, comp in indexedcomps:
        # Find all paths that have comp at the given level
        res = self._index.get(comp, {}).get(i + level)
        if res is None: # Non-existing path; navtree is inverse, keep going
            pathset = IISet()
            if not navtree: return pathset
        pathset = intersection(pathset, res)
        
        if navtree and i + level >= navtree_start:
            depthset = union(depthset, intersection(pathset,
                self._index.get(None, {}).get(i + level)))
    
    if depth >= 0:
        # Limit results to those that terminate within depth levels
        start = len(comps) - 1
        if navtree: start = max(start, (navtree_start - level))
        depthset = multiunion(filter(None, [depthset] + [
            intersection(pathset, self._index.get(None, {}).get(i + level))
            for i in xrange(start, start + depth + 1)]))

    if navtree or depth >= 0: return depthset
    return pathset
    def search(self, path, default_level=0, depth=-1, navtree=0,
                                                             navtree_start=0):
        """
        path is either a string representing a
        relative URL or a part of a relative URL or
        a tuple (path,level).

        level >= 0  starts searching at the given level
        level <  0  not implemented yet
        """

        if isinstance(path, basestring):
            startlevel = default_level
        else:
            startlevel = int(path[1])
            path = path[0]

        absolute_path = isinstance(path, basestring) and path.startswith('/')
        comps = filter(None, path.split('/'))

        orig_comps = [''] + comps[:]

        if depth > 0:
            raise ValueError, "Can't do depth searches anymore"

        if not comps:
            comps = ['dmd']
            startlevel = 1
        elif comps[0] == 'zport':
            comps = comps[1:]
        elif comps[0] != 'dmd':
            raise ValueError, "Depth searches must start with 'dmd'"
        startlevel = len(comps)
        #startlevel = len(comps)-1 if len(comps) > 1 else 1

        if len(comps) == 0:
            if depth == -1 and not navtree:
                return IISet(self._unindex.keys())

        # Make sure that we get depth = 1 if in navtree mode
        # unless specified otherwise

        orig_depth = depth
        if depth == -1:
            depth = 0 or navtree

        # Optimized navtree starting with absolute path
        if absolute_path and navtree and depth == 1 and default_level==0:
            set_list = []
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    set_list.append(self._index_parents[parent_path])
                except KeyError:
                    pass
            return multiunion(set_list)
        # Optimized breadcrumbs
        elif absolute_path and navtree and depth == 0 and default_level==0:
            item_list = IISet()
            # Insert root element
            if navtree_start >= len(orig_comps):
                navtree_start = 0
            # create a set of parent paths to search
            for i in range(len(orig_comps), navtree_start, -1):
                parent_path = '/'.join(orig_comps[:i])
                parent_path = parent_path and parent_path or '/'
                try:
                    item_list.insert(self._index_items[parent_path])
                except KeyError:
                    pass
            return item_list
        # Specific object search
        elif absolute_path and orig_depth == 0 and default_level == 0:
            try:
                return IISet([self._index_items[path]])
            except KeyError:
                return IISet()
        # Single depth search
        elif absolute_path and orig_depth == 1 and default_level == 0:
            # only get objects contained in requested folder
            try:
                return self._index_parents[path]
            except KeyError:
                return IISet()
        # Sitemaps, relative paths, and depth queries
        elif startlevel >= 0:

            pathset = None # Same as pathindex
            navset  = None # For collecting siblings along the way
            depthset = None # For limiting depth

            if navtree and depth and \
                   self._index.has_key(None) and \
                   self._index[None].has_key(startlevel):
                navset = self._index[None][startlevel]
            for level in range(startlevel, startlevel+len(comps)):
                if level <= len(comps):
                    comp = "/".join(comps[:level])
                    if (not self._index.has_key(comp)
                        or not self._index[comp].has_key(level)):
                        # Navtree is inverse, keep going even for
                        # nonexisting paths
                        if navtree:
                            pathset = IISet()
                        else:
                            return IISet()
                    else:
                        return self._index[comp][level]
                    if navtree and depth and \
                           self._index.has_key(None) and \
                           self._index[None].has_key(level+depth):
                        navset  = union(navset, intersection(pathset,
                                              self._index[None][level+depth]))
                if level-startlevel >= len(comps) or navtree:
                    if (self._index.has_key(None)
                        and self._index[None].has_key(level)):
                        depthset = union(depthset, intersection(pathset,
                                                    self._index[None][level]))

            if navtree:
                return union(depthset, navset) or IISet()
            elif depth:
                return depthset or IISet()
            else:
                return pathset or IISet()

        else:
            results = IISet()
            for level in range(0,self._depth + 1):
                ids = None
                error = 0
                for cn in range(0,len(comps)):
                    comp = comps[cn]
                    try:
                        ids = intersection(ids,self._index[comp][level+cn])
                    except KeyError:
                        error = 1
                if error==0:
                    results = union(results,ids)
            return results