def _reindex_doc(self, docid, text): # Touch as few docid->w(docid, score) maps in ._wordinfo as possible. old_wids = self.get_words(docid) old_wid2w, old_docw = self._get_frequencies(old_wids) new_wids = self._lexicon.sourceToWordIds(text) new_wid2w, new_docw = self._get_frequencies(new_wids) old_widset = IFTreeSet(old_wid2w.keys()) new_widset = IFTreeSet(new_wid2w.keys()) in_both_widset = intersection(old_widset, new_widset) only_old_widset = difference(old_widset, in_both_widset) only_new_widset = difference(new_widset, in_both_widset) del old_widset, new_widset for wid in only_old_widset.keys(): self._del_wordinfo(wid, docid) for wid in only_new_widset.keys(): self._add_wordinfo(wid, new_wid2w[wid], docid) for wid in in_both_widset.keys(): # For the Okapi indexer, the "if" will trigger only for words # whose counts have changed. For the cosine indexer, the "if" # may trigger for every wid, since W(d) probably changed and # W(d) is divided into every score. newscore = new_wid2w[wid] if old_wid2w[wid] != newscore: self._add_wordinfo(wid, newscore, docid) self._docweight[docid] = new_docw self._docwords[docid] = widcode.encode(new_wids) return len(new_wids)
def getFromIndex(self, klass, property, operator, value): catalogs = getUtilitiesFor(ICatalog) intids = getUtility(IIntIds) for name, catalog in catalogs: for iname, index in catalog.items(): if isinstance(index, FieldIndex) and \ index.field_name == property and \ index.interface.__name__ == klass: if operator == '==': results = catalog.apply({iname: (value, value)}) elif operator == '!=': all = catalog.apply({iname: (None, None)}) temp = catalog.apply({iname: (value, value)}) results = difference(all, temp) elif operator == '<=': results = catalog.apply({iname: (value, None)}) elif operator == '<': lt_eq = catalog.apply({iname: (value, None)}) temp = catalog.apply({iname: (value, value)}) results = difference(lt_eq, temp) elif operator == '>=': results = catalog.apply({iname: (None, value)}) elif operator == '>': gt_eq = catalog.apply({iname: (None, value)}) temp = catalog.apply({iname: (value, value)}) results = difference(gt_eq, temp) obj_list = [intids.getObject(result) for result in results] return obj_list #I could check whether property has an index by hasPropertyIndex. #But by this approach this always returns IF objects return self.getAll(klass)
def getFromIndex(self, klass, property, operator, value): """Return all instances of the given interface as a list >>> from ocql.testing import utils_opt >>> utils_opt.setupInterfaces(None) >>> utils_opt.setupCatalog(None) >>> db = IDB(None) >>> result = db.getFromIndex('IOptimizedClass', 'value', '<=', 5) >>> type(result) <type 'list'> >>> sorted(result, key=lambda x:x.name) [Opt: 0, Opt: 1, Opt: 2, Opt: 3, Opt: 4, Opt: 5] """ catalogs = getUtilitiesFor(ICatalog) intids = getUtility(IIntIds) for name, catalog in catalogs: for iname, index in catalog.items(): if isinstance(index, FieldIndex) and \ index.field_name == property and \ index.interface.__name__ == klass: if operator == '==': results = catalog.apply({iname:(value, value)}) elif operator == '!=': all = catalog.apply({iname:(None, None)}) temp = catalog.apply({iname:(value, value)}) results = difference(all, temp) elif operator == '>=': results = catalog.apply({iname:(value, None)}) elif operator == '>': lt_eq = catalog.apply({iname:(value, None)}) temp = catalog.apply({iname:(value, value)}) results = difference(lt_eq, temp) elif operator == '<=': results = catalog.apply({iname:(None, value)}) elif operator == '<': gt_eq = catalog.apply({iname:(None, value)}) temp = catalog.apply({iname:(value, value)}) results = difference(gt_eq, temp) obj_list = [intids.getObject(result) for result in results] return obj_list raise ReanalyzeRequired()
def apply(self, cache, context=None): results = [] for index, term in enumerate(self.terms): result = term.cached_apply(cache, context) # If we do not have any results for the first index, just # return an empty set and stop here. if not result: if not index: return IFSet() continue # pragma: no cover (peephole optimizer interferes) results.append(result) result = results.pop(0) for other in results: result = difference(result, other) if not result: # Empty results return result return result
def executeQuery(self, index): L = [] Nots = [] for subnode in self.getValue(): if subnode.nodeType() == "NOT": r = subnode.getValue().executeQuery(index) # If None, technically it matches every doc, but we treat # it as if it matched none (we want # real_word AND NOT stop_word # to act like plain real_word). if r is not None: Nots.append((r, 1)) else: r = subnode.executeQuery(index) # If None, technically it matches every doc, so needn't be # included. if r is not None: L.append((r, 1)) set = mass_weightedIntersection(L) if Nots: notset = mass_weightedUnion(Nots) set = difference(set, notset) return set
def apply(self, cache, context=None): index = self.getIndex(context) values = index.apply((None, None)) matches = index.apply((self.value, self.value)) return difference(values, matches)
def apply(self, cache, context=None): return difference(self._all(), self.term.cached_apply(cache, context))
def apply(self, context=None): index = self.getIndex(context) all = index.apply((None, None)) r = index.apply((self.not_value, self.not_value)) return difference(all, r)
def apply(self, context=None): return difference(self._all(), self.term.apply(context))
def difference(self, *args): from BTrees.IFBTree import difference return difference(*args)