Пример #1
0
    def _reindex_doc(self, docid, text):
        # Touch as few docid->w(docid, score) maps in ._wordinfo as possible.
        old_wids = self.get_words(docid)
        old_wid2w, old_docw = self._get_frequencies(old_wids)

        new_wids = self._lexicon.sourceToWordIds(text)
        new_wid2w, new_docw = self._get_frequencies(new_wids)

        old_widset = IFTreeSet(old_wid2w.keys())
        new_widset = IFTreeSet(new_wid2w.keys())

        in_both_widset = intersection(old_widset, new_widset)
        only_old_widset = difference(old_widset, in_both_widset)
        only_new_widset = difference(new_widset, in_both_widset)
        del old_widset, new_widset

        for wid in only_old_widset.keys():
            self._del_wordinfo(wid, docid)

        for wid in only_new_widset.keys():
            self._add_wordinfo(wid, new_wid2w[wid], docid)

        for wid in in_both_widset.keys():
            # For the Okapi indexer, the "if" will trigger only for words
            # whose counts have changed.  For the cosine indexer, the "if"
            # may trigger for every wid, since W(d) probably changed and
            # W(d) is divided into every score.
            newscore = new_wid2w[wid]
            if old_wid2w[wid] != newscore:
                self._add_wordinfo(wid, newscore, docid)

        self._docweight[docid] = new_docw
        self._docwords[docid] = widcode.encode(new_wids)
        return len(new_wids)
Пример #2
0
    def getFromIndex(self, klass, property, operator, value):
        catalogs = getUtilitiesFor(ICatalog)
        intids = getUtility(IIntIds)
        for name, catalog in catalogs:
            for iname, index in catalog.items():
                if isinstance(index, FieldIndex) and \
                index.field_name == property and \
                index.interface.__name__ == klass:
                    if operator == '==':
                        results = catalog.apply({iname: (value, value)})
                    elif operator == '!=':
                        all = catalog.apply({iname: (None, None)})
                        temp = catalog.apply({iname: (value, value)})
                        results = difference(all, temp)
                    elif operator == '<=':
                        results = catalog.apply({iname: (value, None)})
                    elif operator == '<':
                        lt_eq = catalog.apply({iname: (value, None)})
                        temp = catalog.apply({iname: (value, value)})
                        results = difference(lt_eq, temp)
                    elif operator == '>=':
                        results = catalog.apply({iname: (None, value)})
                    elif operator == '>':
                        gt_eq = catalog.apply({iname: (None, value)})
                        temp = catalog.apply({iname: (value, value)})
                        results = difference(gt_eq, temp)

                    obj_list = [intids.getObject(result) for result in results]
                    return obj_list
        #I could check whether property has an index by hasPropertyIndex.
        #But by this approach this always returns IF objects
        return self.getAll(klass)
Пример #3
0
    def getFromIndex(self, klass, property, operator, value):
        """Return all instances of the given interface as a list

            >>> from ocql.testing import utils_opt
            >>> utils_opt.setupInterfaces(None)
            >>> utils_opt.setupCatalog(None)

            >>> db = IDB(None)
            >>> result = db.getFromIndex('IOptimizedClass', 'value', '<=', 5)
            >>> type(result)
            <type 'list'>
            >>> sorted(result, key=lambda x:x.name)
            [Opt: 0, Opt: 1, Opt: 2, Opt: 3, Opt: 4, Opt: 5]
        """
        catalogs = getUtilitiesFor(ICatalog)
        intids = getUtility(IIntIds)
        for name, catalog in catalogs:
            for iname, index in catalog.items():
                if isinstance(index, FieldIndex) and \
                index.field_name == property and \
                index.interface.__name__ == klass:
                    if operator == '==':
                        results = catalog.apply({iname:(value, value)})
                    elif operator == '!=':
                        all = catalog.apply({iname:(None, None)})
                        temp = catalog.apply({iname:(value, value)})
                        results = difference(all, temp)
                    elif operator == '>=':
                        results = catalog.apply({iname:(value, None)})
                    elif operator == '>':
                        lt_eq = catalog.apply({iname:(value, None)})
                        temp = catalog.apply({iname:(value, value)})
                        results = difference(lt_eq, temp)
                    elif operator == '<=':
                        results = catalog.apply({iname:(None, value)})
                    elif operator == '<':
                        gt_eq = catalog.apply({iname:(None, value)})
                        temp = catalog.apply({iname:(value, value)})
                        results = difference(gt_eq, temp)

                    obj_list = [intids.getObject(result) for result in results]
                    return obj_list

        raise ReanalyzeRequired()
Пример #4
0
    def apply(self, cache, context=None):
        results = []

        for index, term in enumerate(self.terms):
            result = term.cached_apply(cache, context)
            # If we do not have any results for the first index, just
            # return an empty set and stop here.
            if not result:
                if not index:
                    return IFSet()
                continue  # pragma: no cover (peephole optimizer interferes)
            results.append(result)

        result = results.pop(0)
        for other in results:
            result = difference(result, other)
            if not result:
                # Empty results
                return result
        return result
Пример #5
0
 def executeQuery(self, index):
     L = []
     Nots = []
     for subnode in self.getValue():
         if subnode.nodeType() == "NOT":
             r = subnode.getValue().executeQuery(index)
             # If None, technically it matches every doc, but we treat
             # it as if it matched none (we want
             #     real_word AND NOT stop_word
             # to act like plain real_word).
             if r is not None:
                 Nots.append((r, 1))
         else:
             r = subnode.executeQuery(index)
             # If None, technically it matches every doc, so needn't be
             # included.
             if r is not None:
                 L.append((r, 1))
     set = mass_weightedIntersection(L)
     if Nots:
         notset = mass_weightedUnion(Nots)
         set = difference(set, notset)
     return set
Пример #6
0
 def apply(self, cache, context=None):
     index = self.getIndex(context)
     values = index.apply((None, None))
     matches = index.apply((self.value, self.value))
     return difference(values, matches)
Пример #7
0
 def apply(self, cache, context=None):
     return difference(self._all(), self.term.cached_apply(cache, context))
Пример #8
0
 def apply(self, context=None):
     index = self.getIndex(context)
     all = index.apply((None, None))
     r = index.apply((self.not_value, self.not_value))
     return difference(all, r)
Пример #9
0
 def apply(self, context=None):
     return difference(self._all(), self.term.apply(context))
Пример #10
0
 def difference(self, *args):
     from BTrees.IFBTree import difference
     return difference(*args)