Пример #1
0
    def removeWordIdsForDocId(self, docid, wids):

        for wid in wids:

            try:
                if isinstance(self._forward_idx[wid], int):
                    del self._forward_idx[wid]
                else:
                    self._forward_idx[wid].remove(docid)
                    if len(self._forward_idx[wid])==0:
                        del self._forward_idx[wid]
            except KeyError: pass

        old_wids = decode(self._reverse_idx[docid])
        new_wids = [w for w in old_wids if w not in wids]
        self._reverse_idx[docid] = encode(new_wids)
        self._frequencies[docid] = self._get_frequencies(new_wids)
Пример #2
0
    def removeDocument(self, docid):
        """ remove a document and all its words from the storage """
        
        try:
            wids = decode(self._reverse_idx[docid])
        except KeyError: return

        del self._reverse_idx[docid]
        del self._frequencies[docid]

        for wid in wids:
            try:
                if isinstance(self._forward_idx[wid], int):
                    del self._forward_idx[wid]
                else: 
                    self._forward_idx[wid].remove(docid)
            except KeyError: pass

            try:
                if len(self._forward_idx[wid]) == 0:
                    del self._forward_idx[wid]
            except KeyError: pass

        self._length.change(-1)
Пример #3
0
 def getWordIdsForDocId(self, docid):
     """ return a sequence of words contained in the document with 
         ID 'docId'
     """
     return decode(self._reverse_idx[docid])