def _apply_index( self, request, cid='' ): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parametrs, then return None. If the request contains a parameter with the name of the column + "_usage", snif for information on how to handle applying the index. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest( request, self.getId() ) if record.keys is None: return None term = self._convertDateTime( record.keys[0] ) # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # #until_only = IISet() #map( until_only.update, self._until_only.values( term ) ) # XXX use multi-union until_only = multiunion( self._until_only.values( term ) ) #since_only = IISet() #map( since_only.update, self._since_only.values( None, term ) ) # XXX use multi-union since_only = multiunion( self._since_only.values( None, term ) ) #until = IISet() #map( until.update, self._until.values( term ) ) # XXX use multi-union until = multiunion( self._until.values( term ) ) #since = IISet() #map( since.update, self._since.values( None, term ) ) # XXX use multi-union since = multiunion( self._since.values( None, term ) ) bounded = intersection( until, since ) # Merge from smallest to largest. #result = union( self._always, until_only ) result = union( bounded, until_only ) result = union( result, since_only ) #result = union( result, bounded ) result = union( result, self._always ) return result, ( self._since_field, self._until_field )
def _apply_index(self, request, cid=''): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parametrs, then return None. If the request contains a parameter with the name of the column + "_usage", snif for information on how to handle applying the index. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest(request, self.getId()) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # #until_only = IISet() #map( until_only.update, self._until_only.values( term ) ) # XXX use multi-union until_only = multiunion(self._until_only.values(term)) #since_only = IISet() #map( since_only.update, self._since_only.values( None, term ) ) # XXX use multi-union since_only = multiunion(self._since_only.values(None, term)) #until = IISet() #map( until.update, self._until.values( term ) ) # XXX use multi-union until = multiunion(self._until.values(term)) #since = IISet() #map( since.update, self._since.values( None, term ) ) # XXX use multi-union since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. #result = union( self._always, until_only ) result = union(bounded, until_only) result = union(result, since_only) #result = union( result, bounded ) result = union(result, self._always) return result, (self._since_field, self._until_field)
def in_(self, arg): # in """Given a sequence, return the union of rids for each. The argument a string of comma seperated tokens. It is split on the commas and the terms are whitespace-stripped to form a sequence of strings. """ # Parse and validate. # =================== if not isinstance(arg, basestring): raise TypeError("arg is not a string: '%s'" % arg) elif not arg: raise ValueError("no arg given") elif ',' not in arg: raise ValueError("malformed arg [no comma]: '%s'" % arg) elif not self.case_sensitive: arg = arg.lower() # Build. # ====== results= [] for value in [v.strip() for v in arg.split(',')]: result = self.values.get(value, IISet()) results.append((len(result), result)) results.sort() # optimization; merge smallest to largest return multiunion([r[1] for r in results])
def _apply_index(self, request): record = parseIndexRequest(request, self.id) try: qstart, qend = record.keys except TypeError: return None minint = BTrees.family64.minint maxint = BTrees.family64.maxint qstart = min(maxint, max(minint, qstart)) qend = max(minint, min(maxint, qend)) # start in inside range start = multiunion(self._since_index.values(max=qstart)) end = multiunion(self._until_index.values(min=qstart)) start_into = intersection(start, end) # end inside range start = multiunion(self._since_index.values(max=qend)) end = multiunion(self._until_index.values(min=qend)) end_into = intersection(start, end) # start before range and end after range start = multiunion(self._since_index.values(min=qstart)) end = multiunion(self._until_index.values(max=qend)) start_before_end_after = intersection(start, end) result = union(start_into, end_into) result = union(result, start_before_end_after) return multiunion(map(self._index.__getitem__, result)), (self.id,)
def _eval(self,context): csq = self._classifySubqueries() if csq['empty']: return context._getObjectIds() sqs= csq['lookup'] + csq['complex'] + csq['indexed'] + csq['notQ'] if not sqs: return IISet() if len(sqs) >= 4: return multiunion([q._eval(context) for q in sqs]) r = None for q in sqs: r = union(r,q._eval(context)) return r
def _eval(self, context): csq = self._classifySubqueries() if csq['empty']: return context._getObjectIds() sqs = csq['lookup'] + csq['complex'] + csq['indexed'] + csq['notQ'] if not sqs: return IISet() if len(sqs) >= 4: return multiunion([q._eval(context) for q in sqs]) r = None for q in sqs: r = union(r, q._eval(context)) return r
def _apply_not(self, not_parm, resultset=None): index = self._index setlist = [] for k in not_parm: s = index.get(k, None) if s is None: continue elif isinstance(s, int): s = IISet((s, )) setlist.append(s) return multiunion(setlist)
def _apply_index(self, request, resultset=None): setlist = [] indices_used = [] for reltype in self.getIndexSourceNames(): query = request.get(reltype) if query is None: continue if isinstance(query, str): target = query else: target = IUUID(query) indices_used.append(reltype) index = self._index[reltype] s = index.get(target) if s is None: continue else: setlist.append(s) if not indices_used: return if len(setlist) == 1: return setlist[0], tuple(indices_used) # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is # faster than creating a multiunion first. if resultset is not None and len(resultset) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(resultset, s)) r = multiunion(smalllist) else: r = multiunion(setlist) if r is None: r = IISet() return r, tuple(indices_used)
def query_index(self, record, resultset=None): cache = self.getRequestCache() if cache is not None: cachekey = self.getRequestCacheKey(record, resultset) cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return cached else: return difference(resultset, cached) term = self._convertDateTime(record.keys[0]) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion( [bounded, until_only, since_only, self._always]) if cache is not None: cache[cachekey] = result return result else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([since, since_only, until_only, until]) if cache is not None: cache[cachekey] = result return difference(resultset, result)
def query_index(self, record, resultset=None): cache = self.getRequestCache() if cache is not None: cachekey = self.getRequestCacheKey(record, resultset) cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return cached else: return difference(resultset, cached) term = self._convertDateTime(record.keys[0]) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion([bounded, until_only, since_only, self._always]) if cache is not None: cache[cachekey] = result return result else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([since, since_only, until_only, until]) if cache is not None: cache[cachekey] = result return difference(resultset, result)
def _eval(self, context): csq = self._classifySubqueries() if csq["empty"]: return context._getObjectIds() sqs = csq["lookup"] + csq["complex"] + csq["indexed"] + csq["notQ"] if not sqs: return IISet() if len(sqs) >= 4: return multiunion([q._eval(context) for q in sqs]) r = None for q in sqs: r = union(r, q._eval(context)) return r
def __call__(self): context = self.context acl_users = aq_get(context, 'acl_users') catalog = getToolByName(context, 'portal_catalog') now = DateTime() modified = catalog._catalog.indexes['modified'] mod_values = modified._index.values days_7 = modified._convert(now - 7) days_14 = modified._convert(now - 14) days_30 = modified._convert(now - 30) output = { 'version': config.package_version, 'objects': len(catalog), 'users': len(acl_users.source_users.listUserIds()), 'modified_30': len(multiunion(mod_values(days_30))), 'modified_14': len(multiunion(mod_values(days_14))), 'modified_7': len(multiunion(mod_values(days_7))), } response = self.request.response response.setHeader('content-type', 'application/json') response.setBody(json.dumps(output)) return response
def setOperation(op, sets, isearch): '''perform *op* on *sets*. if *isearch*, return an incremental search. *op* may be '"and"' or '"or"'. Uses 'IncrementalSearch', if available. ''' if not sets: if op == 'and': return # None means all results if isearch: search = IOr() search.complete() return search return IISet() # Note: "multiunion" is *much* faster than "IOr"! #if IAnd is not None and (isearch or len(sets) > 1): if IAnd is not None and (isearch or (op == 'and' and len(sets) > 1)): isets = [] for set in sets: if set is None: # all results if op == 'and': continue else: return if not isinstance(set, ISearch): set = IBTree(set) isets.append(set) if op == 'and' and not isets: return # empty 'and' cl = op == 'and' and IAnd or IOr if len(isets) == 1: # do not wrap a one element search search = isets[0] else: search = cl(*isets) search.complete() if isearch: return search return search.asSet() if op == 'or' and len(sets) > 3: r = multiunion(sets) else: combine = op == 'and' and intersection or union r = None for set in sets: r = combine(r, set) if r is None: if combine is union: r = IISet() else: return if isearch: r = IBTree(r) return r
def _search(self, path, default_level=0): """ Perform the actual search. ``path`` a string representing a relative URL, or a part of a relative URL, or a tuple ``(path, level)``. In the first two cases, use ``default_level`` as the level for the search. ``default_level`` the level to use for non-tuple queries. ``level >= 0`` => match ``path`` only at the given level. ``level < 0`` => match ``path`` at *any* level """ if isinstance(path, str): level = default_level else: level = int(path[1]) path = path[0] if level < 0: # Search at every level, return the union of all results return multiunion([ self._search(path, level) for level in range(self._depth + 1) ]) comps = list(filter(None, path.split('/'))) if level + len(comps) - 1 > self._depth: # Our search is for a path longer than anything in the index return IISet() if len(comps) == 0: return IISet(self._unindex.keys()) results = None for i, comp in reversed(list(enumerate(comps))): tree = self._index.get(comp, None) if tree is None: return IISet() tree2 = tree.get(level + i, None) if tree2 is None: return IISet() results = intersection(results, tree2) return results
def below(self, arg): """Find all resources at or below path, within the limits given. """ # Parse and validate. # =================== path, upper, lower = self._path_and_limits(arg) rid = self.path2rid.get(path, None) if rid is None: return # Build # ===== parts = path.split(os.sep) rids = None for level in range(len(parts)): rids = intersection(rids, self.parts[(level, parts[level])]) if rids is None: return IISet() # short-cut # Limits # ====== # Remove rids that are above any upper limit, and then only include rids # that are above any lower limit. Limits are relative to the level of # the requested path. if upper is not None: upper += level for i in range(level, upper): if i not in self.levels: break rids = difference(rids, self.levels[i]) if lower is not None: lower += level _rids = [] for i in range(level, lower): if i not in self.levels: break _rids.append(self.levels[i]) rids = intersection(rids, multiunion(_rids)) return rids
def _search(self, path, default_level=0): """ Perform the actual search. ``path`` a string representing a relative URL, or a part of a relative URL, or a tuple ``(path, level)``. In the first two cases, use ``default_level`` as the level for the search. ``default_level`` the level to use for non-tuple queries. ``level >= 0`` => match ``path`` only at the given level. ``level < 0`` => match ``path`` at *any* level """ if isinstance(path, str): level = default_level else: level = int(path[1]) path = path[0] if level < 0: # Search at every level, return the union of all results return multiunion( [self._search(path, level) for level in range(self._depth + 1)]) comps = filter(None, path.split('/')) if level + len(comps) - 1 > self._depth: # Our search is for a path longer than anything in the index return IISet() if len(comps) == 0: return IISet(self._unindex.keys()) results = None for i, comp in reversed(list(enumerate(comps))): tree = self._index.get(comp, None) if tree is None: return IISet() tree2 = tree.get(level + i, None) if tree2 is None: return IISet() results = intersection(results, tree2) return results
def setOperation(op, sets, isearch): '''perform *op* on *sets*. if *isearch*, return an incremental search. *op* may be '"and"' or '"or"'. Uses 'IncrementalSearch', if available. ''' if not sets: if op == 'and': return # None means all results if isearch: search = IOr(); search.complete(); return search return IISet() # Note: "multiunion" is *much* faster than "IOr"! #if IAnd is not None and (isearch or len(sets) > 1): if IAnd is not None and (isearch or (op == 'and' and len(sets) > 1)): isets = [] for set in sets: if set is None: # all results if op == 'and': continue else: return if not isinstance(set, ISearch): set = IBTree(set) isets.append(set) if op == 'and' and not isets: return # empty 'and' cl = op == 'and' and IAnd or IOr if len(isets) == 1: # do not wrap a one element search search = isets[0] else: search = cl(*isets); search.complete() if isearch: return search if hasattr(search, 'asSet'): r = search.asSet() else: r = IISet(); r.__setstate__((tuple(search),)) return r if op == 'or' and len(sets) > 5: r = multiunion(sets) else: combine = op == 'and' and intersection or union r= None for set in sets: r= combine(r,set) if r is None: if combine is union: r = IISet() else: return if isearch: r = IBTree(r) return r
def refresh(self): """Load the data set from the database. self.constraints contains a list of lists. Within each sublist, the terms are either ANDed or NOTed together; the results are then ORed. The below could be optimized in a couple ways: - stop searching as soon as the result set proves empty - perform both levels of merge from smallest to largest See original ZCatalog code for implementation hints. """ import dewey # avoid circular import all = dewey.get_catalog().rids if self.constraints is None: results = all else: results = [] for grouping in self.constraints: for operation, query, (call, arg) in grouping: if (operation is None) and (call is None): # OR result = all elif (operation is None) and (call is not None): # OR ... result = call(arg) else: # AND/NOT assert None not in (operation, call) # safety net result = operation(result, call(arg)) if result is not None: results.append(result) results = multiunion(results) # OR if results is None: results = IISet() self.data = results
def above(self, arg): """Find all resources at or above path, within the limits given. Here we actually call below() on <path> and all of its ancestors, passing the limits straight through, with the exception that limits default to 0:1 rather than None:None. Use '0:' for the latter. """ # Parse and validate. # =================== path, upper, lower = self._path_and_limits(arg) rid = self.path2rid.get(path, None) if rid is None: return # Build # ===== tmpl = "%s " if (upper, lower) == (None, None): tmpl += '0:1' # default: breadcrumbs else: if upper is not None: tmpl += str(upper) tmpl += ":" if lower is not None: tmpl += str(lower) parts = path.split(os.sep) rids = [] for level in range(len(parts)): ancestor = os.sep.join(parts[:level+1]) ancestor = ancestor and ancestor or '/' rids.append(self.below(tmpl % ancestor)) rids = multiunion(rids)
def in_(self, arg): # in """Given a sequence, return the union of rids for each. If the argument starts with a [ or (, it is evaled as a list or tuple. Otherwise, it is split on comma and stripped to form a sequence of strings. """ # Parse and validate. # =================== if not isinstance(arg, basestring): raise TypeError("arg is not a string: '%s'" % arg) elif not arg: raise ValueError("no arg given") elif ',' not in arg: raise ValueError("malformed arg [no comma]: '%s'" % arg) if arg[0] in '[(': values = eval(arg) if not isinstance(values, (list, tuple)): raise TypeError("arg didn't define list or tuple") else: values = [v.strip() for v in arg.split(',')] # Build. # ====== results= [] for value in values: result = self.rids.get(value, IISet()) results.append((len(result), result)) results.sort() # optimization; merge smallest to largest return multiunion([r[1] for r in results])
def dateindex_apply_index(self, request, cid="", type=type, res=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys == None: return None keys = map(self._convert, record.keys) index = self._index r = None opr = None # experimental code for specifing the operator operator = record.get("operator", self.useOperator) if not operator in self.operators: raise RuntimeError, "operator not valid: %s" % operator # depending on the operator we use intersection or union if operator == "or": set_func = union else: set_func = intersection # range parameter range_arg = record.get("range", None) if range_arg: opr = "range" opr_args = [] if range_arg.find("min") > -1: opr_args.append("min") if range_arg.find("max") > -1: opr_args.append("max") if record.get("usage", None): # see if any usage params are sent to field opr = record.usage.lower().split(":") opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if "min" in opr_args: lo = min(keys) else: lo = None if "max" in opr_args: hi = max(keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) # for k, set in setlist: # if type(set) is IntType: # set = IISet((set,)) # r = set_func(r, set) # XXX: Use multiunion! r = multiunion(setlist) else: # not a range search for key in keys: set = index.get(key, None) if set is not None: if isinstance(set, int): set = IISet((set,)) else: # set can't be bigger than res set = intersection(set, res) r = set_func(r, set) if isinstance(r, int): r = IISet((r,)) if r is None: return IISet(), (self.id,) else: return r, (self.id,)
def dateindex_apply_index( self, request, cid='', type=type, res=None): record = parseIndexRequest( request, self.id, self.query_options ) if record.keys == None: return None keys = map( self._convert, record.keys ) index = self._index r = None opr = None #experimental code for specifing the operator operator = record.get( 'operator', self.useOperator ) if not operator in self.operators : raise RuntimeError, "operator not valid: %s" % operator # depending on the operator we use intersection or union if operator=="or": set_func = union else: set_func = intersection # range parameter range_arg = record.get('range',None) if range_arg: opr = "range" opr_args = [] if range_arg.find("min") > -1: opr_args.append("min") if range_arg.find("max") > -1: opr_args.append("max") if record.get('usage',None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr=="range": # range search if 'min' in opr_args: lo = min(keys) else: lo = None if 'max' in opr_args: hi = max(keys) else: hi = None if hi: setlist = index.values(lo,hi) else: setlist = index.values(lo) #for k, set in setlist: #if type(set) is IntType: #set = IISet((set,)) #r = set_func(r, set) # XXX: Use multiunion! r = multiunion(setlist) else: # not a range search for key in keys: set = index.get(key, None) if set is not None: if isinstance(set, int): set = IISet((set,)) else: # set can't be bigger than res set = intersection(set, res) r = set_func(r, set) if isinstance(r, int): r = IISet((r,)) if r is None: return IISet(), (self.id,) else: return r, (self.id,)
def _apply_index(self, request, cid='', type=type): """Apply the index to query parameters given in the request arg. The request argument should be a mapping object. If the request does not have a key which matches the "id" of the index instance, then None is returned. If the request *does* have a key which matches the "id" of the index instance, one of a few things can happen: - if the value is a blank string, None is returned (in order to support requests from web forms where you can't tell a blank string from empty). - if the value is a nonblank string, turn the value into a single-element sequence, and proceed. - if the value is a sequence, return a union search. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None q_start = record.get('start', None) q_end = record.get('end', None) if q_start is None or q_end is None: return IISet(), (self.id,) # get both indexes: for start and end zcatalog = self._getCatalog() i_start = zcatalog.getIndex(self.startindex) i_end = zcatalog.getIndex(self.endindex) # search: # # q_start|--------------------|q_end # # 1) i_start|---------------------------|i_end # # 2) i_start|---------------|i_end # # 3) i_start|-----------------|i_end # # 4) i_start|-----|i_end ################################### # do 1) objects with "both outside" # query1_1 = { self.startindex: { # objects starting before q_start 'query': q_start, 'range': 'max', } } res1_1 = i_start._apply_index(query1_1) query1_2 = { self.endindex: { # objects ending after q_end 'query': q_end, 'range': 'min', } } res1_2 = i_end._apply_index(query1_2) res1 = intersection(res1_1[0], res1_2[0]) ##################################### # do 2) objects with "start inside" # query2_1 = { self.endindex: { # objects ending after q_start 'query': q_start, 'range': 'min', } } res2_1 = i_end._apply_index(query2_1) query2_2 = { self.endindex: { # objects ending before q_end 'query': q_end, 'range': 'max', } } res2_2 = i_end._apply_index(query2_2) res2 = intersection(res2_1[0], res2_2[0]) ################################### # do 3) objects with "end inside" query3_1 = { self.startindex: { # objects starting after q_start 'query': q_start, 'range': 'min', } } res3_1 = i_start._apply_index(query3_1) query3_2 = { self.startindex: { # objects starting before q_end 'query': q_end, 'range': 'max', } } res3_2 = i_start._apply_index(query3_2) res3 = intersection(res3_1[0], res3_2[0]) ################################### # do 4) object where both are inside # -> already found with 2) and 3) :-) ################################### # union the three results result = multiunion([res1, res2, res3]) # last: return the result return result, (self.id,)
def extendedpathindex_search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0, tmpres=None): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). default_level specifies the level to use when no more specific level has been passed in with the path. level >= 0 starts searching at the given level level < 0 finds matches at *any* level depth let's you limit the results to items at most depth levels deeper than the matched path. depth == 0 means no subitems are included at all, with depth == 1 only direct children are included, etc. depth == -1, the default, returns all children at any depth. navtree is treated as a boolean; if it evaluates to True, not only the query match is returned, but also each container in the path. If depth is greater than 0, also all siblings of those containers, as well as the siblings of the match are included as well, plus *all* documents at the starting level. navtree_start limits what containers are included in a navtree search. If greater than 0, only containers (and possibly their siblings) at that level and up will be included in the resultset. """ if isinstance(path, basestring): level = default_level else: level = int(path[1]) path = path[0] if level < 0: # Search at every level, return the union of all results return multiunion([ self.search(path, level, depth, navtree, navtree_start) for level in xrange(self._depth + 1) ]) comps = filter(None, path.split('/')) if navtree and depth == -1: # Navtrees don't do recursive depth = 1 # # Optimisations # pathlength = level + len(comps) - 1 if navtree and navtree_start > min(pathlength + depth, self._depth): # This navtree_start excludes all items that match the depth return IISet() if pathlength > self._depth: # Our search is for a path longer than anything in the index return IISet() if level == 0 and depth in (0, 1): # We have easy indexes for absolute paths where # we are looking for depth 0 or 1 result sets if navtree: # Optimized absolute path navtree and breadcrumbs cases result = [] add = lambda x: x is not None and result.append(x) if depth == 1: # Navtree case, all sibling elements along the path convert = multiunion index = self._index_parents else: # Breadcrumbs case, all direct elements along the path convert = IISet index = self._index_items # Collect all results along the path for i in range(len(comps), navtree_start - 1, -1): parent_path = '/' + '/'.join(comps[:i]) add(index.get(parent_path)) return convert(result) if not path.startswith('/'): path = '/' + path if depth == 0: # Specific object search res = self._index_items.get(path) return res and IISet([res]) or IISet() else: # Single depth search return self._index_parents.get(path, IISet()) # Avoid using the root set # as it is common for all objects anyway and add overhead # There is an assumption about all indexed values having the # same common base path if level == 0: indexpath = list(filter(None, self.getPhysicalPath())) minlength = min(len(indexpath), len(comps)) # Truncate path to first different element for i in xrange(minlength): if indexpath[i] != comps[i]: break level += 1 comps = comps[level:] if not comps and depth == -1: # Recursive search for everything return IISet(self._unindex) # # Core application of the indexes # pathset = tmpres # Same as pathindex depthset = None # For limiting depth if navtree and depth > 0: # Include the elements up to the matching path depthset = multiunion([ self._index.get(None, {}).get(i, IISet()) for i in range(min(navtree_start, level), max(navtree_start, level) + 1) ]) indexedcomps = enumerate(comps) if not navtree: # Optimize relative-path searches by starting with the # presumed smaller sets at the end of the path first # We can't do this for the navtree case because it needs # the bigger rootset to include siblings along the way. indexedcomps = list(indexedcomps) indexedcomps.reverse() for i, comp in indexedcomps: # Find all paths that have comp at the given level res = self._index.get(comp, {}).get(i + level) if res is None: # Non-existing path; navtree is inverse, keep going pathset = IISet() if not navtree: return pathset pathset = intersection(pathset, res) if navtree and i + level >= navtree_start: depthset = union( depthset, intersection(pathset, self._index.get(None, {}).get(i + level))) if depth >= 0: # Limit results to those that terminate within depth levels start = len(comps) - 1 if navtree: start = max(start, (navtree_start - level)) depthset = multiunion( filter(None, [depthset] + [ intersection(pathset, self._index.get(None, {}).get(i + level)) for i in xrange(start, start + depth + 1) ])) if navtree or depth >= 0: return depthset return pathset
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in the argument Normalize the 'query' arguments into integer values at minute precision before querying. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None keys = map( self._convert, record.keys ) index = self._index r = None opr = None #experimental code for specifing the operator operator = record.get( 'operator', self.useOperator ) if not operator in self.operators : raise RuntimeError("operator not valid: %s" % operator) # depending on the operator we use intersection or union if operator=="or": set_func = union else: set_func = intersection # range parameter range_arg = record.get('range',None) if range_arg: opr = "range" opr_args = [] if range_arg.find("min") > -1: opr_args.append("min") if range_arg.find("max") > -1: opr_args.append("max") if record.get('usage',None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr=="range": # range search if 'min' in opr_args: lo = min(keys) else: lo = None if 'max' in opr_args: hi = max(keys) else: hi = None if hi: setlist = index.values(lo,hi) else: setlist = index.values(lo) r = multiunion(setlist) else: # not a range search for key in keys: set = index.get(key, None) if set is not None: if isinstance(set, int): set = IISet((set,)) else: # set can't be bigger than resultset set = intersection(set, resultset) r = set_func(r, set) if isinstance(r, int): r = IISet((r,)) if r is None: return IISet(), (self.id,) else: return r, (self.id,)
def unindex_apply_index(self, request, cid='', type=type, res=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys==None: return None index = self._index r = None opr = None # experimental code for specifing the operator operator = record.get('operator',self.useOperator) if not operator in self.operators : raise RuntimeError,"operator not valid: %s" % escape(operator) # depending on the operator we use intersection or union if operator=="or": set_func = union else: set_func = intersection # Range parameter range_parm = record.get('range',None) if range_parm: opr = "range" opr_args = [] if range_parm.find("min")>-1: opr_args.append("min") if range_parm.find("max")>-1: opr_args.append("max") if record.get('usage',None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args=opr[0], opr[1:] if opr=="range": # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.values(lo,hi) else: setlist = index.values(lo) # If we only use 1 key (default setting), intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result,)) return result, (self.id,) if operator == 'or': r = multiunion(setlist) else: # For intersection, sort with smallest data set first tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s,)) tmp.append(s) if len(tmp) > 2: setlist = sorted(tmp, key=len) else: setlist = tmp r = res for s in setlist: r = intersection(r, s) else: # not a range search # Filter duplicates, and sort by length keys = set(record.keys) setlist = [] for k in keys: s = index.get(k, None) # If None, try to bail early if s is None: if operator == 'or': # If union, we can't possibly get a bigger result continue # If intersection, we can't possibly get a smaller result return IISet(), (self.id,) elif isinstance(s, int): s = IISet((s,)) setlist.append(s) # If we only use 1 key (default setting), intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result,)) return result, (self.id,) if operator == 'or': # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is faster # than creating a multiunion first. if res is not None and len(res) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(res, s)) r = multiunion(smalllist) else: r = multiunion(setlist) else: # For intersection, sort with smallest data set first if len(setlist) > 2: setlist = sorted(setlist, key=len) r = res for s in setlist: r = intersection(r, s) if isinstance(r, int): r=IISet((r,)) if r is None: return IISet(), (self.id,) else: return r, (self.id,)
def query_index(self, record, resultset=None): """Search the index with the given IndexQuery object. If not `None`, the resultset argument indicates that the search result is relevant only on this set, i.e. everything outside resultset is of no importance. The index can use this information for optimizations. """ index = self._index r = None opr = None # not / exclude parameter not_parm = record.get('not', None) operator = record.operator cachekey = None cache = self.getRequestCache() if cache is not None: cachekey = self.getRequestCacheKey(record) if cachekey is not None: cached = None if operator == 'or': cached = cache.get(cachekey, None) else: cached_setlist = cache.get(cachekey, None) if cached_setlist is not None: r = resultset for s in cached_setlist: # the result is bound by the resultset r = intersection(r, s) # If intersection, we can't possibly get a # smaller result if not r: break cached = r if cached is not None: if isinstance(cached, int): cached = IISet((cached, )) if not_parm: not_parm = list(map(self._convert, not_parm)) exclude = self._apply_not(not_parm, resultset) cached = difference(cached, exclude) return cached if not record.keys and not_parm: # convert into indexed format not_parm = list(map(self._convert, not_parm)) # we have only a 'not' query record.keys = [k for k in index.keys() if k not in not_parm] else: # convert query arguments into indexed format record.keys = list(map(self._convert, record.keys)) # Range parameter range_parm = record.get('range', None) if range_parm: opr = 'range' opr_args = [] if range_parm.find('min') > -1: opr_args.append('min') if range_parm.find('max') > -1: opr_args.append('max') if record.get('usage', None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr == 'range': # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) # If we only use one key, intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result,)) if cachekey is not None: if operator == 'or': cache[cachekey] = result else: cache[cachekey] = [result] if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result if operator == 'or': tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s,)) tmp.append(s) r = multiunion(tmp) if cachekey is not None: cache[cachekey] = r else: # For intersection, sort with smallest data set first tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s,)) tmp.append(s) if len(tmp) > 2: setlist = sorted(tmp, key=len) else: setlist = tmp # 'r' is not invariant of resultset. Thus, we # have to remember 'setlist' if cachekey is not None: cache[cachekey] = setlist r = resultset for s in setlist: # the result is bound by the resultset r = intersection(r, s) # If intersection, we can't possibly get a smaller result if not r: break else: # not a range search # Filter duplicates setlist = [] for k in record.keys: if k is None: # Prevent None from being looked up. None doesn't # have a valid ordering definition compared to any # other object. BTrees 4.0+ will throw a TypeError # "object has default comparison". continue try: s = index.get(k, None) except TypeError: # key is not valid for this Btree so the value is None LOG.error( '%(context)s: query_index tried ' 'to look up key %(key)r from index %(index)r ' 'but key was of the wrong type.', dict( context=self.__class__.__name__, key=k, index=self.id, ) ) s = None # If None, try to bail early if s is None: if operator == 'or': # If union, we can possibly get a bigger result continue # If intersection, we can't possibly get a smaller result if cachekey is not None: # If operator is 'and', we have to cache a list of # IISet objects cache[cachekey] = [IISet()] return IISet() elif isinstance(s, int): s = IISet((s,)) setlist.append(s) # If we only use one key return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result,)) if cachekey is not None: if operator == 'or': cache[cachekey] = result else: cache[cachekey] = [result] if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result if operator == 'or': # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is # faster than creating a multiunion first. if resultset is not None and len(resultset) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(resultset, s)) r = multiunion(smalllist) # 'r' is not invariant of resultset. Thus, we # have to remember the union of 'setlist'. But # this is maybe a performance killer. So we do not cache. # if cachekey is not None: # cache[cachekey] = multiunion(setlist) else: r = multiunion(setlist) if cachekey is not None: cache[cachekey] = r else: # For intersection, sort with smallest data set first if len(setlist) > 2: setlist = sorted(setlist, key=len) # 'r' is not invariant of resultset. Thus, we # have to remember the union of 'setlist' if cachekey is not None: cache[cachekey] = setlist r = resultset for s in setlist: r = intersection(r, s) # If intersection, we can't possibly get a smaller result if not r: break if isinstance(r, int): r = IISet((r, )) if r is None: return IISet() if not_parm: exclude = self._apply_not(not_parm, resultset) r = difference(r, exclude) return r
def numObjects(self): """Return the number of indexed objects.""" setlist = [] for fs in self.filteredSets.values(): setlist.append(fs.getIds()) return len(multiunion(setlist))
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parameters, then return None. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ iid = self.id record = parseIndexRequest(request, iid, self.query_options) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) REQUEST = aq_get(self, 'REQUEST', None) if REQUEST is not None: catalog = aq_parent(aq_parent(aq_inner(self))) if catalog is not None: key = self._cache_key(catalog) cache = REQUEST.get(key, None) tid = isinstance(term, int) and term / 10 or 'None' if resultset is None: cachekey = '_daterangeindex_%s_%s' % (iid, tid) else: cachekey = '_daterangeindex_inverse_%s_%s' % (iid, tid) if cache is None: cache = REQUEST[key] = RequestCache() else: cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return (cached, (self._since_field, self._until_field)) else: return (difference(resultset, cached), (self._since_field, self._until_field)) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) # Total result is bound by resultset if REQUEST is None: until = intersection(resultset, until) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion([bounded, until_only, since_only, self._always]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (result, (self._since_field, self._until_field)) else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([since, since_only, until_only, until]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (difference(resultset, result), (self._since_field, self._until_field))
def daterangeindex_apply_index(self, request, cid='', res=None): record = parseIndexRequest(request, self.getId()) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) REQUEST = getattr(self, 'REQUEST', None) if REQUEST is not None: catalog = aq_parent(aq_parent(aq_inner(self))) if catalog is not None: key = '%s_%s' % (catalog.getId(), catalog.getCounter()) cache = REQUEST.get(key, None) tid = isinstance(term, int) and term / 10 or 'None' index_id = self.getId() if res is None: cachekey = '_daterangeindex_%s_%s' % (index_id, tid) else: cachekey = '_daterangeindex_inverse_%s_%s' % (index_id, tid) if cache is None: cache = REQUEST[key] = RequestCache() else: cached = cache.get(cachekey, None) if cached is not None: if res is None: return cached, (self._since_field, self._until_field) else: return (difference(res, cached), (self._since_field, self._until_field)) if res is None: # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # XXX Does this apply for multiunion? # until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) # Total result is bound by res if REQUEST is None: until = intersection(res, until) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) result = multiunion([bounded, until_only, since_only, self._always]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return result, (self._since_field, self._until_field) else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([until_only, since_only, until, since]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return difference(res, result), (self._since_field, self._until_field)
def search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, basestring): startlevel = default_level else: startlevel = int(path[1]) path = path[0] absolute_path = isinstance(path, basestring) and path.startswith('/') comps = filter(None, path.split('/')) orig_comps = [''] + comps[:] # Optimization - avoid using the root set # as it is common for all objects anyway and add overhead # There is an assumption about catalog/index having # the same container as content if default_level == 0: indexpath = list(filter(None, self.getPhysicalPath())) while min(len(indexpath), len(comps)): if indexpath[0] == comps[0]: del indexpath[0] del comps[0] startlevel += 1 else: break if len(comps) == 0: if depth == -1 and not navtree: return IISet(self._unindex.keys()) # Make sure that we get depth = 1 if in navtree mode # unless specified otherwise orig_depth = depth if depth == -1: depth = 0 or navtree # Optimized navtree starting with absolute path if absolute_path and navtree and depth == 1 and default_level==0: set_list = [] # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: set_list.append(self._index_parents[parent_path]) except KeyError: pass return multiunion(set_list) # Optimized breadcrumbs elif absolute_path and navtree and depth == 0 and default_level==0: item_list = IISet() # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: item_list.insert(self._index_items[parent_path]) except KeyError: pass return item_list # Specific object search elif absolute_path and orig_depth == 0 and default_level == 0: try: return IISet([self._index_items[path]]) except KeyError: return IISet() # Single depth search elif absolute_path and orig_depth == 1 and default_level == 0: # only get objects contained in requested folder try: return self._index_parents[path] except KeyError: return IISet() # Sitemaps, relative paths, and depth queries elif startlevel >= 0: pathset = None # Same as pathindex navset = None # For collecting siblings along the way depthset = None # For limiting depth if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(startlevel): navset = self._index[None][startlevel] for level in range(startlevel, startlevel+len(comps) + depth): if level-startlevel < len(comps): comp = comps[level-startlevel] if not self._index.has_key(comp) or not self._index[comp].has_key(level): # Navtree is inverse, keep going even for # nonexisting paths if navtree: pathset = IISet() else: return IISet() else: pathset = intersection(pathset, self._index[comp][level]) if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(level+depth): navset = union(navset, intersection(pathset, self._index[None][level+depth])) if level-startlevel >= len(comps) or navtree: if self._index.has_key(None) and self._index[None].has_key(level): depthset = union(depthset, intersection(pathset, self._index[None][level])) if navtree: return union(depthset, navset) or IISet() elif depth: return depthset or IISet() else: return pathset or IISet() else: results = IISet() for level in range(0,self._depth + 1): ids = None error = 0 for cn in range(0,len(comps)): comp = comps[cn] try: ids = intersection(ids,self._index[comp][level+cn]) except KeyError: error = 1 if error==0: results = union(results,ids) return results
if 'max' in opr_args: hi = max(keys) else: hi = None if hi: setlist = index.values(lo,hi) else: setlist = index.values(lo) #for k, set in setlist: #if type(set) is IntType: #set = IISet((set,)) #r = set_func(r, set) # XXX: Use multiunion! r = multiunion(setlist) else: # not a range search for key in keys: set = index.get(key, None) if set is not None: if type(set) is IntType: set = IISet((set,)) r = set_func(r, set) if type(r) is IntType: r = IISet((r,)) if r is None: return IISet(), (self.id,) else:
def search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, basestring): startlevel = default_level else: startlevel = int(path[1]) path = path[0] absolute_path = isinstance(path, basestring) and path.startswith('/') comps = filter(None, path.split('/')) orig_comps = [''] + comps[:] # Optimization - avoid using the root set # as it is common for all objects anyway and add overhead # There is an assumption about catalog/index having # the same container as content if default_level == 0: indexpath = list(filter(None, self.getPhysicalPath())) while min(len(indexpath), len(comps)): if indexpath[0] == comps[0]: del indexpath[0] del comps[0] startlevel += 1 else: break if len(comps) == 0: if depth == -1 and not navtree: return IISet(self._unindex.keys()) # Make sure that we get depth = 1 if in navtree mode # unless specified otherwise orig_depth = depth if depth == -1: depth = 0 or navtree # Optimized navtree starting with absolute path if absolute_path and navtree and depth == 1 and default_level == 0: set_list = [] # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: set_list.append(self._index_parents[parent_path]) except KeyError: pass return multiunion(set_list) # Optimized breadcrumbs elif absolute_path and navtree and depth == 0 and default_level == 0: item_list = IISet() # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: item_list.insert(self._index_items[parent_path]) except KeyError: pass return item_list # Specific object search elif absolute_path and orig_depth == 0 and default_level == 0: try: return IISet([self._index_items[path]]) except KeyError: return IISet() # Single depth search elif absolute_path and orig_depth == 1 and default_level == 0: # only get objects contained in requested folder try: return self._index_parents[path] except KeyError: return IISet() # Sitemaps, relative paths, and depth queries elif startlevel >= 0: pathset = None # Same as pathindex navset = None # For collecting siblings along the way depthset = None # For limiting depth if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(startlevel): navset = self._index[None][startlevel] for level in range(startlevel, startlevel + len(comps) + depth): if level - startlevel < len(comps): comp = comps[level - startlevel] if not self._index.has_key( comp) or not self._index[comp].has_key(level): # Navtree is inverse, keep going even for # nonexisting paths if navtree: pathset = IISet() else: return IISet() else: pathset = intersection(pathset, self._index[comp][level]) if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(level+depth): navset = union( navset, intersection(pathset, self._index[None][level + depth])) if level - startlevel >= len(comps) or navtree: if self._index.has_key(None) and self._index[None].has_key( level): depthset = union( depthset, intersection(pathset, self._index[None][level])) if navtree: return union(depthset, navset) or IISet() elif depth: return depthset or IISet() else: return pathset or IISet() else: results = IISet() for level in range(0, self._depth + 1): ids = None error = 0 for cn in range(0, len(comps)): comp = comps[cn] try: ids = intersection(ids, self._index[comp][level + cn]) except KeyError: error = 1 if error == 0: results = union(results, ids) return results
def daterangeindex_apply_index(self, request, cid='', res=None): record = parseIndexRequest(request, self.getId()) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) REQUEST = getattr(self, 'REQUEST', None) if REQUEST is not None: catalog = aq_parent(aq_parent(aq_inner(self))) if catalog is not None: key = '%s_%s'%(catalog.getId(), catalog.getCounter()) cache = REQUEST.get(key, None) tid = isinstance(term, int) and term / 10 or 'None' index_id = self.getId() if res is None: cachekey = '_daterangeindex_%s_%s' % (index_id, tid) else: cachekey = '_daterangeindex_inverse_%s_%s' % (index_id, tid) if cache is None: cache = REQUEST[key] = RequestCache() else: cached = cache.get(cachekey, None) if cached is not None: if res is None: return cached, (self._since_field, self._until_field) else: return (difference(res, cached), (self._since_field, self._until_field)) if res is None: # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # XXX Does this apply for multiunion? # until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) # Total result is bound by res if REQUEST is None: until = intersection(res, until) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) result = multiunion([bounded, until_only, since_only, self._always]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return result, (self._since_field, self._until_field) else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([until_only, since_only, until, since]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return difference(res, result), (self._since_field, self._until_field)
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in the argument Normalize the 'query' arguments into integer values at minute precision before querying. """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None keys = map(self._convert, record.keys) index = self._index r = None opr = None #experimental code for specifing the operator operator = record.get('operator', self.useOperator) if not operator in self.operators: raise RuntimeError("operator not valid: %s" % operator) # depending on the operator we use intersection or union if operator == "or": set_func = union else: set_func = intersection # range parameter range_arg = record.get('range', None) if range_arg: opr = "range" opr_args = [] if range_arg.find("min") > -1: opr_args.append("min") if range_arg.find("max") > -1: opr_args.append("max") if record.get('usage', None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if 'min' in opr_args: lo = min(keys) else: lo = None if 'max' in opr_args: hi = max(keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) r = multiunion(setlist) else: # not a range search for key in keys: set = index.get(key, None) if set is not None: if isinstance(set, int): set = IISet((set, )) else: # set can't be bigger than resultset set = intersection(set, resultset) r = set_func(r, set) if isinstance(r, int): r = IISet((r, )) if r is None: return IISet(), (self.id, ) else: return r, (self.id, )
def multiunion(self, *args): from BTrees.LFBTree import multiunion return multiunion(*args)
def query_index(self, record, resultset=None): """Search the index with the given IndexQuery object. If the query has a key which matches the 'id' of the index instance, one of a few things can happen: - if the value is a string, turn the value into a single-element sequence, and proceed. - if the value is a sequence, return a union search. - If the value is a dict and contains a key of the form '<index>_operator' this overrides the default method ('or') to combine search results. Valid values are 'or' and 'and'. """ index = self._index r = None opr = None # not / exclude parameter not_parm = record.get('not', None) operator = record.operator cachekey = None cache = self.getRequestCache() if cache is not None: cachekey = self.getRequestCacheKey(record) if cachekey is not None: cached = None if operator == 'or': cached = cache.get(cachekey, None) else: cached_setlist = cache.get(cachekey, None) if cached_setlist is not None: r = resultset for s in cached_setlist: # the result is bound by the resultset r = intersection(r, s) # If intersection, we can't possibly get a # smaller result if not r: break cached = r if cached is not None: if isinstance(cached, int): cached = IISet((cached, )) if not_parm: not_parm = list(map(self._convert, not_parm)) exclude = self._apply_not(not_parm, resultset) cached = difference(cached, exclude) return cached if not record.keys and not_parm: # convert into indexed format not_parm = list(map(self._convert, not_parm)) # we have only a 'not' query record.keys = [k for k in index.keys() if k not in not_parm] else: # convert query arguments into indexed format record.keys = list(map(self._convert, record.keys)) # Range parameter range_parm = record.get('range', None) if range_parm: opr = "range" opr_args = [] if range_parm.find("min") > -1: opr_args.append("min") if range_parm.find("max") > -1: opr_args.append("max") if record.get('usage', None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) # If we only use one key, intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) if cachekey is not None: if operator == 'or': cache[cachekey] = result else: cache[cachekey] = [result] if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result if operator == 'or': tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s, )) tmp.append(s) r = multiunion(tmp) if cachekey is not None: cache[cachekey] = r else: # For intersection, sort with smallest data set first tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s, )) tmp.append(s) if len(tmp) > 2: setlist = sorted(tmp, key=len) else: setlist = tmp # 'r' is not invariant of resultset. Thus, we # have to remember 'setlist' if cachekey is not None: cache[cachekey] = setlist r = resultset for s in setlist: # the result is bound by the resultset r = intersection(r, s) # If intersection, we can't possibly get a smaller result if not r: break else: # not a range search # Filter duplicates setlist = [] for k in record.keys: if k is None: # Prevent None from being looked up. None doesn't # have a valid ordering definition compared to any # other object. BTrees 4.0+ will throw a TypeError # "object has default comparison". continue s = index.get(k, None) # If None, try to bail early if s is None: if operator == 'or': # If union, we can possibly get a bigger result continue # If intersection, we can't possibly get a smaller result if cachekey is not None: # If operator is 'and', we have to cache a list of # IISet objects cache[cachekey] = [IISet()] return IISet() elif isinstance(s, int): s = IISet((s, )) setlist.append(s) # If we only use one key return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) if cachekey is not None: if operator == 'or': cache[cachekey] = result else: cache[cachekey] = [result] if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result if operator == 'or': # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is # faster than creating a multiunion first. if resultset is not None and len(resultset) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(resultset, s)) r = multiunion(smalllist) # 'r' is not invariant of resultset. Thus, we # have to remember the union of 'setlist'. But # this is maybe a performance killer. So we do not cache. # if cachekey is not None: # cache[cachekey] = multiunion(setlist) else: r = multiunion(setlist) if cachekey is not None: cache[cachekey] = r else: # For intersection, sort with smallest data set first if len(setlist) > 2: setlist = sorted(setlist, key=len) # 'r' is not invariant of resultset. Thus, we # have to remember the union of 'setlist' if cachekey is not None: cache[cachekey] = setlist r = resultset for s in setlist: r = intersection(r, s) # If intersection, we can't possibly get a smaller result if not r: break if isinstance(r, int): r = IISet((r, )) if r is None: return IISet() if not_parm: exclude = self._apply_not(not_parm, resultset) r = difference(r, exclude) return r
def multiunion(self, *args): from BTrees.IIBTree import multiunion return multiunion(*args)
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. The resultset argument contains the resultset, as already calculated by ZCatalog's search method. """ if not request.has_key(self._id): # 'in' doesn't work with this object return IITreeSet(self._uid2end.keys()), () start = self._get_position(request, 'start') end = self._get_position(request, 'end') used_fields = () # We don't want the events who end before the start. In other # words we want to find those evens whose end >= the start query, # or None as None means they have infinite recurrence. try: maxkey = self._end2uid.maxKey() except ValueError: # No events at all return IITreeSet(), used_fields if start is None or maxkey is None: # start is None, so we need to search right from the start; or # (amazingly) all events have infinite recurrence. # This means we must return *all* uids. start_uids = IITreeSet(self._uid2end.keys()) else: used_fields += (self.start_attr,) #start_uids = IITreeSet() start = start.utctimetuple() try: minkey = self._end2uid.minKey(start) # Events that end on exactly the same same time as the # search period start should not be included: if minkey == start: excludemin = True else: excludemin = False start_uids = multiunion(self._end2uid.values(minkey, maxkey, excludemin=excludemin)) except ValueError: # No ending events start_uids = IITreeSet() # Include open ended events, if any if self._end2uid.has_key(None): start_uids = union(start_uids, self._end2uid[None]) # XXX At this point an intersection with the resultset might be # beneficial. It would stop us from calculating the recurrence # of ids that won't be returned. It could be done after the # intersection with end_uids below as well, performance tests will tell. # We also do not want the events whose start come after the end query. # In other words, we find all events where start <= end. if end is not None: end = end.utctimetuple() try: minkey = self._start2uid.minKey() end_uids = multiunion(self._start2uid.values(minkey, end)) used_fields += (self.end_attr,) except ValueError: # No events return IITreeSet(), used_fields result = intersection(start_uids, end_uids) else: # No end specified, take all: result = start_uids return self._finalize_index(result, start, end, used_fields)
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in the request arg. The request argument should be a mapping object. If the request does not have a key which matches the "id" of the index instance, then None is returned. If the request *does* have a key which matches the "id" of the index instance, one of a few things can happen: - if the value is a blank string, None is returned (in order to support requests from web forms where you can't tell a blank string from empty). - if the value is a nonblank string, turn the value into a single-element sequence, and proceed. - if the value is a sequence, return a union search. - If the value is a dict and contains a key of the form '<index>_operator' this overrides the default method ('or') to combine search results. Valid values are "or" and "and". If None is not returned as a result of the abovementioned constraints, two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. FAQ answer: to search a Field Index for documents that have a blank string as their value, wrap the request value up in a tuple ala: request = {'id':('',)} """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None index = self._index r = None opr = None # not / exclude parameter not_parm = record.get('not', None) if not record.keys and not_parm: # convert into indexed format not_parm = map(self._convert, not_parm) # we have only a 'not' query record.keys = [k for k in index.keys() if k not in not_parm] else: # convert query arguments into indexed format record.keys = map(self._convert, record.keys) # experimental code for specifing the operator operator = record.get('operator', self.useOperator) if not operator in self.operators: raise RuntimeError("operator not valid: %s" % escape(operator)) # Range parameter range_parm = record.get('range', None) if range_parm: opr = "range" opr_args = [] if range_parm.find("min") > -1: opr_args.append("min") if range_parm.find("max") > -1: opr_args.append("max") if record.get('usage', None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) # If we only use one key, intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result,)) if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result, (self.id,) if operator == 'or': tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s,)) tmp.append(s) r = multiunion(tmp) else: # For intersection, sort with smallest data set first tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s,)) tmp.append(s) if len(tmp) > 2: setlist = sorted(tmp, key=len) else: setlist = tmp r = resultset for s in setlist: # the result is bound by the resultset r = intersection(r, s) else: # not a range search # Filter duplicates setlist = [] for k in record.keys: if k is None: raise TypeError('None cannot be in an index.') s = index.get(k, None) # If None, try to bail early if s is None: if operator == 'or': # If union, we can't possibly get a bigger result continue # If intersection, we can't possibly get a smaller result return IISet(), (self.id,) elif isinstance(s, int): s = IISet((s,)) setlist.append(s) # If we only use one key return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result,)) if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result, (self.id,) if operator == 'or': # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is # faster than creating a multiunion first. if resultset is not None and len(resultset) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(resultset, s)) r = multiunion(smalllist) else: r = multiunion(setlist) else: # For intersection, sort with smallest data set first if len(setlist) > 2: setlist = sorted(setlist, key=len) r = resultset for s in setlist: r = intersection(r, s) if isinstance(r, int): r = IISet((r, )) if r is None: return IISet(), (self.id,) if not_parm: exclude = self._apply_not(not_parm, resultset) r = difference(r, exclude) return r, (self.id,)
def search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, basestring): startlevel = default_level else: startlevel = int(path[1]) path = path[0] absolute_path = isinstance(path, basestring) and path.startswith('/') comps = filter(None, path.split('/')) orig_comps = [''] + comps[:] if depth > 0: raise ValueError("Can't do depth searches anymore") if not comps: comps = ['dmd'] startlevel = 1 else: if comps[0] == getCSEConf().get('virtualroot', '').replace('/', ''): comps = comps[1:] if comps[0] == 'zport': comps = comps[1:] if comps[0] != 'dmd': raise ValueError("Depth searches must start with 'dmd'") startlevel = len(comps) if len(comps) == 0: if depth == -1 and not navtree: return IISet(self._unindex.keys()) # Make sure that we get depth = 1 if in navtree mode # unless specified otherwise orig_depth = depth if depth == -1: depth = 0 or navtree # Optimized navtree starting with absolute path if absolute_path and navtree and depth == 1 and default_level == 0: set_list = [] # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: set_list.append(self._index_parents[parent_path]) except KeyError: pass return multiunion(set_list) # Optimized breadcrumbs elif absolute_path and navtree and depth == 0 and default_level == 0: item_list = IISet() # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: item_list.insert(self._index_items[parent_path]) except KeyError: pass return item_list # Specific object search elif absolute_path and orig_depth == 0 and default_level == 0: try: return IISet([self._index_items[path]]) except KeyError: return IISet() # Single depth search elif absolute_path and orig_depth == 1 and default_level == 0: # only get objects contained in requested folder try: return self._index_parents[path] except KeyError: return IISet() # Sitemaps, relative paths, and depth queries elif startlevel >= 0: pathset = None # Same as pathindex navset = None # For collecting siblings along the way depthset = None # For limiting depth if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(startlevel): navset = self._index[None][startlevel] for level in range(startlevel, startlevel + len(comps)): if level <= len(comps): comp = "/".join(comps[:level]) if (not self._index.has_key(comp) or not self._index[comp].has_key(level)): # Navtree is inverse, keep going even for # nonexisting paths if navtree: pathset = IISet() else: return IISet() else: return self._index[comp][level] if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(level+depth): navset = union( navset, intersection(pathset, self._index[None][level + depth])) if level - startlevel >= len(comps) or navtree: if (self._index.has_key(None) and self._index[None].has_key(level)): depthset = union( depthset, intersection(pathset, self._index[None][level])) if navtree: return union(depthset, navset) or IISet() elif depth: return depthset or IISet() else: return pathset or IISet() else: results = IISet() for level in range(0, self._depth + 1): ids = None error = 0 for cn in range(0, len(comps)): comp = comps[cn] try: ids = intersection(ids, self._index[comp][level + cn]) except KeyError: error = 1 if error == 0: results = union(results, ids) return results
def unindex_apply_index(self, request, cid='', type=type, res=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys == None: return None index = self._index r = None opr = None # experimental code for specifing the operator operator = record.get('operator', self.useOperator) if not operator in self.operators: raise RuntimeError, "operator not valid: %s" % escape(operator) # depending on the operator we use intersection or union if operator == "or": set_func = union else: set_func = intersection # Range parameter range_parm = record.get('range', None) if range_parm: opr = "range" opr_args = [] if range_parm.find("min") > -1: opr_args.append("min") if range_parm.find("max") > -1: opr_args.append("max") if record.get('usage', None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) # If we only use 1 key (default setting), intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) return result, (self.id, ) if operator == 'or': r = multiunion(setlist) else: # For intersection, sort with smallest data set first tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s, )) tmp.append(s) if len(tmp) > 2: setlist = sorted(tmp, key=len) else: setlist = tmp r = res for s in setlist: r = intersection(r, s) else: # not a range search # Filter duplicates, and sort by length keys = set(record.keys) setlist = [] for k in keys: s = index.get(k, None) # If None, try to bail early if s is None: if operator == 'or': # If union, we can't possibly get a bigger result continue # If intersection, we can't possibly get a smaller result return IISet(), (self.id, ) elif isinstance(s, int): s = IISet((s, )) setlist.append(s) # If we only use 1 key (default setting), intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) return result, (self.id, ) if operator == 'or': # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is faster # than creating a multiunion first. if res is not None and len(res) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(res, s)) r = multiunion(smalllist) else: r = multiunion(setlist) else: # For intersection, sort with smallest data set first if len(setlist) > 2: setlist = sorted(setlist, key=len) r = res for s in setlist: r = intersection(r, s) if isinstance(r, int): r = IISet((r, )) if r is None: return IISet(), (self.id, ) else: return r, (self.id, )
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in the request arg. The request argument should be a mapping object. If the request does not have a key which matches the "id" of the index instance, then None is returned. If the request *does* have a key which matches the "id" of the index instance, one of a few things can happen: - if the value is a blank string, None is returned (in order to support requests from web forms where you can't tell a blank string from empty). - if the value is a nonblank string, turn the value into a single-element sequence, and proceed. - if the value is a sequence, return a union search. - If the value is a dict and contains a key of the form '<index>_operator' this overrides the default method ('or') to combine search results. Valid values are "or" and "and". If None is not returned as a result of the abovementioned constraints, two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. FAQ answer: to search a Field Index for documents that have a blank string as their value, wrap the request value up in a tuple ala: request = {'id':('',)} """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None index = self._index r = None opr = None # not / exclude parameter not_parm = record.get('not', None) if not record.keys and not_parm: # convert into indexed format not_parm = map(self._convert, not_parm) # we have only a 'not' query record.keys = [k for k in index.keys() if k not in not_parm] else: # convert query arguments into indexed format record.keys = map(self._convert, record.keys) # experimental code for specifing the operator operator = record.get('operator', self.useOperator) if not operator in self.operators: raise RuntimeError("operator not valid: %s" % escape(operator)) # Range parameter range_parm = record.get('range', None) if range_parm: opr = "range" opr_args = [] if range_parm.find("min") > -1: opr_args.append("min") if range_parm.find("max") > -1: opr_args.append("max") if record.get('usage', None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) # If we only use one key, intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result, (self.id, ) if operator == 'or': tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s, )) tmp.append(s) r = multiunion(tmp) else: # For intersection, sort with smallest data set first tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s, )) tmp.append(s) if len(tmp) > 2: setlist = sorted(tmp, key=len) else: setlist = tmp r = resultset for s in setlist: # the result is bound by the resultset r = intersection(r, s) else: # not a range search # Filter duplicates setlist = [] for k in record.keys: s = index.get(k, None) # If None, try to bail early if s is None: if operator == 'or': # If union, we can't possibly get a bigger result continue # If intersection, we can't possibly get a smaller result return IISet(), (self.id, ) elif isinstance(s, int): s = IISet((s, )) setlist.append(s) # If we only use one key return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) if not_parm: exclude = self._apply_not(not_parm, resultset) result = difference(result, exclude) return result, (self.id, ) if operator == 'or': # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is # faster than creating a multiunion first. if resultset is not None and len(resultset) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(resultset, s)) r = multiunion(smalllist) else: r = multiunion(setlist) else: # For intersection, sort with smallest data set first if len(setlist) > 2: setlist = sorted(setlist, key=len) r = resultset for s in setlist: r = intersection(r, s) if isinstance(r, int): r = IISet((r, )) if r is None: return IISet(), (self.id, ) if not_parm: exclude = self._apply_not(not_parm, resultset) r = difference(r, exclude) return r, (self.id, )
def _apply_index(self, request, resultset=None): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parameters, then return None. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ iid = self.id record = parseIndexRequest(request, iid, self.query_options) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) REQUEST = aq_get(self, 'REQUEST', None) if REQUEST is not None: catalog = aq_parent(aq_parent(aq_inner(self))) if catalog is not None: key = self._cache_key(catalog) cache = REQUEST.get(key, None) tid = isinstance(term, int) and term / 10 or 'None' if resultset is None: cachekey = '_daterangeindex_%s_%s' % (iid, tid) else: cachekey = '_daterangeindex_inverse_%s_%s' % (iid, tid) if cache is None: cache = REQUEST[key] = RequestCache() else: cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return (cached, (self._since_field, self._until_field)) else: return (difference(resultset, cached), (self._since_field, self._until_field)) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) # Total result is bound by resultset if REQUEST is None: until = intersection(resultset, until) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion( [bounded, until_only, since_only, self._always]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (result, (self._since_field, self._until_field)) else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([until_only, since_only, until, since]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (difference(resultset, result), (self._since_field, self._until_field))
def extendedpathindex_search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0, tmpres=None): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). default_level specifies the level to use when no more specific level has been passed in with the path. level >= 0 starts searching at the given level level < 0 finds matches at *any* level depth let's you limit the results to items at most depth levels deeper than the matched path. depth == 0 means no subitems are included at all, with depth == 1 only direct children are included, etc. depth == -1, the default, returns all children at any depth. navtree is treated as a boolean; if it evaluates to True, not only the query match is returned, but also each container in the path. If depth is greater than 0, also all siblings of those containers, as well as the siblings of the match are included as well, plus *all* documents at the starting level. navtree_start limits what containers are included in a navtree search. If greater than 0, only containers (and possibly their siblings) at that level and up will be included in the resultset. """ if isinstance(path, basestring): level = default_level else: level = int(path[1]) path = path[0] if level < 0: # Search at every level, return the union of all results return multiunion( [self.search(path, level, depth, navtree, navtree_start) for level in xrange(self._depth + 1)]) comps = filter(None, path.split('/')) if navtree and depth == -1: # Navtrees don't do recursive depth = 1 # # Optimisations # pathlength = level + len(comps) - 1 if navtree and navtree_start > min(pathlength + depth, self._depth): # This navtree_start excludes all items that match the depth return IISet() if pathlength > self._depth: # Our search is for a path longer than anything in the index return IISet() if level == 0 and depth in (0, 1): # We have easy indexes for absolute paths where # we are looking for depth 0 or 1 result sets if navtree: # Optimized absolute path navtree and breadcrumbs cases result = [] add = lambda x: x is not None and result.append(x) if depth == 1: # Navtree case, all sibling elements along the path convert = multiunion index = self._index_parents else: # Breadcrumbs case, all direct elements along the path convert = IISet index = self._index_items # Collect all results along the path for i in range(len(comps), navtree_start - 1, -1): parent_path = '/' + '/'.join(comps[:i]) add(index.get(parent_path)) return convert(result) if not path.startswith('/'): path = '/' + path if depth == 0: # Specific object search res = self._index_items.get(path) return res and IISet([res]) or IISet() else: # Single depth search return self._index_parents.get(path, IISet()) # Avoid using the root set # as it is common for all objects anyway and add overhead # There is an assumption about all indexed values having the # same common base path if level == 0: indexpath = list(filter(None, self.getPhysicalPath())) minlength = min(len(indexpath), len(comps)) # Truncate path to first different element for i in xrange(minlength): if indexpath[i] != comps[i]: break level += 1 comps = comps[level:] if not comps and depth == -1: # Recursive search for everything return IISet(self._unindex) # # Core application of the indexes # pathset = None depthset = None # For limiting depth if navtree and depth > 0: # Include the elements up to the matching path depthset = multiunion([ self._index.get(None, {}).get(i, IISet()) for i in range(min(navtree_start, level), max(navtree_start, level) + 1)]) indexedcomps = enumerate(comps) if not navtree: # Optimize relative-path searches by starting with the # presumed smaller sets at the end of the path first # We can't do this for the navtree case because it needs # the bigger rootset to include siblings along the way. indexedcomps = list(indexedcomps) indexedcomps.reverse() for i, comp in indexedcomps: # Find all paths that have comp at the given level res = self._index.get(comp, {}).get(i + level) if res is None: # Non-existing path; navtree is inverse, keep going pathset = IISet() if not navtree: return pathset pathset = intersection(pathset, res) if navtree and i + level >= navtree_start: depthset = union(depthset, intersection(pathset, self._index.get(None, {}).get(i + level))) if depth >= 0: # Limit results to those that terminate within depth levels start = len(comps) - 1 if navtree: start = max(start, (navtree_start - level)) depthset = multiunion(filter(None, [depthset] + [ intersection(pathset, self._index.get(None, {}).get(i + level)) for i in xrange(start, start + depth + 1)])) if navtree or depth >= 0: return depthset return pathset
def search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, basestring): startlevel = default_level else: startlevel = int(path[1]) path = path[0] absolute_path = isinstance(path, basestring) and path.startswith('/') comps = filter(None, path.split('/')) orig_comps = [''] + comps[:] if depth > 0: raise ValueError, "Can't do depth searches anymore" if not comps: comps = ['dmd'] startlevel = 1 elif comps[0] == 'zport': comps = comps[1:] elif comps[0] != 'dmd': raise ValueError, "Depth searches must start with 'dmd'" startlevel = len(comps) #startlevel = len(comps)-1 if len(comps) > 1 else 1 if len(comps) == 0: if depth == -1 and not navtree: return IISet(self._unindex.keys()) # Make sure that we get depth = 1 if in navtree mode # unless specified otherwise orig_depth = depth if depth == -1: depth = 0 or navtree # Optimized navtree starting with absolute path if absolute_path and navtree and depth == 1 and default_level==0: set_list = [] # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: set_list.append(self._index_parents[parent_path]) except KeyError: pass return multiunion(set_list) # Optimized breadcrumbs elif absolute_path and navtree and depth == 0 and default_level==0: item_list = IISet() # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: item_list.insert(self._index_items[parent_path]) except KeyError: pass return item_list # Specific object search elif absolute_path and orig_depth == 0 and default_level == 0: try: return IISet([self._index_items[path]]) except KeyError: return IISet() # Single depth search elif absolute_path and orig_depth == 1 and default_level == 0: # only get objects contained in requested folder try: return self._index_parents[path] except KeyError: return IISet() # Sitemaps, relative paths, and depth queries elif startlevel >= 0: pathset = None # Same as pathindex navset = None # For collecting siblings along the way depthset = None # For limiting depth if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(startlevel): navset = self._index[None][startlevel] for level in range(startlevel, startlevel+len(comps)): if level <= len(comps): comp = "/".join(comps[:level]) if (not self._index.has_key(comp) or not self._index[comp].has_key(level)): # Navtree is inverse, keep going even for # nonexisting paths if navtree: pathset = IISet() else: return IISet() else: return self._index[comp][level] if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(level+depth): navset = union(navset, intersection(pathset, self._index[None][level+depth])) if level-startlevel >= len(comps) or navtree: if (self._index.has_key(None) and self._index[None].has_key(level)): depthset = union(depthset, intersection(pathset, self._index[None][level])) if navtree: return union(depthset, navset) or IISet() elif depth: return depthset or IISet() else: return pathset or IISet() else: results = IISet() for level in range(0,self._depth + 1): ids = None error = 0 for cn in range(0,len(comps)): comp = comps[cn] try: ids = intersection(ids,self._index[comp][level+cn]) except KeyError: error = 1 if error==0: results = union(results,ids) return results