示例#1
0
    def removeDocument(self, docid):

        try:
            wordids = self._doc2wid[docid]
        except KeyError:
            return  # silently ignore

        wordids = wordids.get()  # unwrap _PS instance

        tree = self._wid2doc
        tree_has = tree.has_key
        for wordid in decode(wordids):

            if tree_has(wordid):
                try:
                    tree[wordid].remove(docid)
                except KeyError:
                    pass

                if not tree[wordid]:
                    del tree[wordid]

        del self._doc2wid[docid]
        del self._docweight[docid]
        self._length.change(-1)
示例#2
0
    def removeDocument(self, docid):

        try:
            wordids = self._doc2wid[docid]
        except KeyError:
            return # silently ignore 

        wordids = wordids.get() # unwrap _PS instance

        tree = self._wid2doc
        tree_has = tree.has_key
        for wordid in decode(wordids):

            if tree_has(wordid):
                try:
                    tree[wordid].remove(docid)
                except KeyError:
                    pass

                if not tree[wordid]:
                    del tree[wordid]

        del self._doc2wid[docid]
        del self._docweight[docid]
        self._length.change(-1)
示例#3
0
    def getPositions(self, docid, wordid):
        """ return a sequence of positions of occurrences of wordid within
            a document given by its docid.
        """

        encoded_wid = encode((wordid, ))
        encoded_document = self._doc2wid[docid].get()

        positions = IITreeSet()
        for pos, wid in enumerate(decode(encoded_document)):
            if wid == wordid:
                positions.insert(pos)
        return positions
示例#4
0
    def getPositions(self, docid, wordid):
        """ return a sequence of positions of occurrences of wordid within
            a document given by its docid.
        """

        encoded_wid = encode((wordid,))
        encoded_document = self._doc2wid[docid].get()

        positions = IITreeSet()
        for pos, wid in enumerate(decode(encoded_document)):
            if wid == wordid:
                positions.insert(pos)
        return positions
示例#5
0
    def insertDocument(self, docid, widlist):

        if not self._doc2wid.has_key(docid):
            self._length.change(1)

        enc_widlist = encode(widlist)
        old_enc_widlist = self._doc2wid.get(docid)
        if old_enc_widlist is not None:
            old_enc_widlist = old_enc_widlist.get()  # unwrap _PS instance

        removed_wordids = []
        if old_enc_widlist != enc_widlist:
            self._doc2wid[docid] = _PS(enc_widlist)
            if old_enc_widlist is not None:
                old_widlist = IISet(decode(old_enc_widlist))
                removed_wordids = difference(old_widlist, IISet(widlist))

        tree = self._wid2doc
        tree_has = tree.has_key
        count = 0
        for wid in widlist:
            count += 1
            if not tree_has(wid):
                tree[wid] = DocidList([docid])
            else:
                if not docid in tree[wid]:
                    tree[wid].insert(docid)

        for wid in removed_wordids:
            if tree_has(wid):
                try:
                    tree[wid].remove(docid)
                except KeyError:
                    pass

        self._docweight[docid] = count
示例#6
0
    def insertDocument(self, docid, widlist):

        if not self._doc2wid.has_key(docid):
            self._length.change(1)

        enc_widlist = encode(widlist)
        old_enc_widlist = self._doc2wid.get(docid)
        if old_enc_widlist is not None:
            old_enc_widlist = old_enc_widlist.get() # unwrap _PS instance

        removed_wordids = []
        if old_enc_widlist != enc_widlist :
            self._doc2wid[docid] = _PS(enc_widlist)
            if old_enc_widlist is not None:
                old_widlist = IISet(decode(old_enc_widlist))
                removed_wordids = difference(old_widlist, IISet(widlist))

        tree = self._wid2doc
        tree_has = tree.has_key
        count = 0
        for wid in widlist:
            count += 1
            if not tree_has(wid):
                tree[wid] = DocidList([docid])
            else:
                if not docid in tree[wid]:   
                    tree[wid].insert(docid)

        for wid in removed_wordids:
            if tree_has(wid):
                try:
                    tree[wid].remove(docid)
                except KeyError:
                    pass

        self._docweight[docid] = count
示例#7
0
 def getWordIdsForDocId(self, docid):
     try:
         ps_wrapper = self._doc2wid[docid]
         return decode(ps_wrapper.get())
     except KeyError:
         raise StorageException('No such docid: %d' % docid)
示例#8
0
 def getWordIdsForDocId(self, docid):
     try:
         ps_wrapper = self._doc2wid[docid]
         return decode(ps_wrapper.get())
     except KeyError:
         raise StorageException('No such docid: %d' % docid)