示例#1
0
    def assignWordId(self, word):
        """Assigns a new word id to the provided word and returns it."""
        # First make sure it's not already in there
        if self._lexicon.has_key(word):
            return self._lexicon[word]


        try: inverse=self._inverseLex
        except AttributeError:
            # woops, old lexicom wo wids
            inverse=self._inverseLex=IOBTree()
            for word, wid in self._lexicon.items():
                inverse[wid]=word

        wid=randid()
        while not inverse.insert(wid, word):
            wid=randid()

        if isinstance(word,StringType):
            self._lexicon[intern(word)] = wid
        else:
            self._lexicon[word] = wid


        return wid
示例#2
0
    def assignWordId(self, word):
        """Assigns a new word id to the provided word and returns it."""
        # First make sure it's not already in there
        if self._lexicon.has_key(word):
            return self._lexicon[word]


        try: inverse=self._inverseLex
        except AttributeError:
            # woops, old lexicom wo wids
            inverse=self._inverseLex=IOBTree()
            for word, wid in self._lexicon.items():
                inverse[wid]=word

        wid=randid()
        while not inverse.insert(wid, word):
            wid=randid()

        if isinstance(word,StringType):
            self._lexicon[intern(word)] = wid
        else:
            self._lexicon[word] = wid


        return wid
    def assignWordId(self, word):
        """Assigns a new word id to the provided word, and return it."""

        # Double check it's not in the lexicon already, and if it is, just
        # return it.
        if self._lexicon.has_key(word):
            return self._lexicon[word]


        # Get word id. BBB Backward compat pain.
        inverse=self._inverseLex
        try: insert=inverse.insert
        except AttributeError:
            # we have an "old" BTree object
            if inverse:
                wid=inverse.keys()[-1]+1
            else:
                self._inverseLex=IOBTree()
                wid=1
            inverse[wid] = word
        else:
            # we have a "new" IOBTree object
            wid=randid()
            while not inverse.insert(wid, word):
                wid=randid()

        self._lexicon[word] = wid

        # Now take all the digrams and insert them into the digram map.
        for digram in self.createDigrams(word):
            set = self._digrams.get(digram, None)
            if set is None:
                self._digrams[digram] = set = IISet()
            set.insert(wid)

        return wid
示例#4
0
    def assignWordId(self, word):
        """Assigns a new word id to the provided word, and return it."""

        # Double check it's not in the lexicon already, and if it is, just
        # return it.
        if self._lexicon.has_key(word):
            return self._lexicon[word]

        # Get word id. BBB Backward compat pain.
        inverse = self._inverseLex
        try:
            insert = inverse.insert
        except AttributeError:
            # we have an "old" BTree object
            if inverse:
                wid = inverse.keys()[-1] + 1
            else:
                self._inverseLex = IOBTree()
                wid = 1
            inverse[wid] = word
        else:
            # we have a "new" IOBTree object
            wid = randid()
            while not inverse.insert(wid, word):
                wid = randid()

        self._lexicon[word] = wid

        # Now take all the digrams and insert them into the digram map.
        for digram in self.createDigrams(word):
            set = self._digrams.get(digram, None)
            if set is None:
                self._digrams[digram] = set = IISet()
            set.insert(wid)

        return wid
示例#5
0
    def catalogObject(self, object, uid, threshold=None,idxs=[]):
        """
        Adds an object to the Catalog by iteratively applying it
        all indexes.

        'object' is the object to be cataloged

        'uid' is the unique Catalog identifier for this object

        """

        data = self.data

        # meta_data is stored as a tuple for efficiency
        newDataRecord = self.recordify(object)

        index=self.uids.get(uid, None)
        if index is not None:
            # old data

            if data.get(index, 0) != newDataRecord:
                # Update the meta-data, if necessary
                data[index] = newDataRecord

        else:
            # new data

            if type(data) is IOBTree:
                # New style, get radom id

                index=getattr(self, '_v_nextid', 0)
                if index%4000 == 0: index = randid()
                while not data.insert(index, newDataRecord):
                    index=randid()

                # We want ids to be somewhat random, but there are
                # advantages for having some ids generated
                # sequentially when many catalog updates are done at
                # once, such as when reindexing or bulk indexing.
                # We allocate ids sequentially using a volatile base,
                # so different threads get different bases. This
                # further reduces conflict and reduces churn in
                # here and it result sets when bulk indexing.
                self._v_nextid=index+1
            else:
                if data:
                    # find the next available unique id
                    index = data.keys()[-1] + 1
                else:
                    index=0
                data[index] = newDataRecord

            try: self.__len__.change(1)
            except AttributeError: pass # No managed length (old-style)

            self.uids[uid] = index
            self.paths[index] = uid

        total = 0

        if idxs==[]: use_indexes = self.indexes.keys()
        else:        use_indexes = idxs

        for name in use_indexes:
            x = self.getIndex(name)
            if hasattr(x, 'index_object'):
                blah = x.index_object(index, object, threshold)
                total = total + blah
            else:
                LOG('Catalog', ERROR, ('catalogObject was passed '
                                       'bad index object %s.' % str(x)))

        return total