def removeDocument(self, docid): try: wordids = self._doc2wid[docid] except KeyError: return # silently ignore wordids = wordids.get() # unwrap _PS instance tree = self._wid2doc tree_has = tree.has_key for wordid in decode(wordids): if tree_has(wordid): try: tree[wordid].remove(docid) except KeyError: pass if not tree[wordid]: del tree[wordid] del self._doc2wid[docid] del self._docweight[docid] self._length.change(-1)
def getPositions(self, docid, wordid): """ return a sequence of positions of occurrences of wordid within a document given by its docid. """ encoded_wid = encode((wordid, )) encoded_document = self._doc2wid[docid].get() positions = IITreeSet() for pos, wid in enumerate(decode(encoded_document)): if wid == wordid: positions.insert(pos) return positions
def getPositions(self, docid, wordid): """ return a sequence of positions of occurrences of wordid within a document given by its docid. """ encoded_wid = encode((wordid,)) encoded_document = self._doc2wid[docid].get() positions = IITreeSet() for pos, wid in enumerate(decode(encoded_document)): if wid == wordid: positions.insert(pos) return positions
def insertDocument(self, docid, widlist): if not self._doc2wid.has_key(docid): self._length.change(1) enc_widlist = encode(widlist) old_enc_widlist = self._doc2wid.get(docid) if old_enc_widlist is not None: old_enc_widlist = old_enc_widlist.get() # unwrap _PS instance removed_wordids = [] if old_enc_widlist != enc_widlist: self._doc2wid[docid] = _PS(enc_widlist) if old_enc_widlist is not None: old_widlist = IISet(decode(old_enc_widlist)) removed_wordids = difference(old_widlist, IISet(widlist)) tree = self._wid2doc tree_has = tree.has_key count = 0 for wid in widlist: count += 1 if not tree_has(wid): tree[wid] = DocidList([docid]) else: if not docid in tree[wid]: tree[wid].insert(docid) for wid in removed_wordids: if tree_has(wid): try: tree[wid].remove(docid) except KeyError: pass self._docweight[docid] = count
def insertDocument(self, docid, widlist): if not self._doc2wid.has_key(docid): self._length.change(1) enc_widlist = encode(widlist) old_enc_widlist = self._doc2wid.get(docid) if old_enc_widlist is not None: old_enc_widlist = old_enc_widlist.get() # unwrap _PS instance removed_wordids = [] if old_enc_widlist != enc_widlist : self._doc2wid[docid] = _PS(enc_widlist) if old_enc_widlist is not None: old_widlist = IISet(decode(old_enc_widlist)) removed_wordids = difference(old_widlist, IISet(widlist)) tree = self._wid2doc tree_has = tree.has_key count = 0 for wid in widlist: count += 1 if not tree_has(wid): tree[wid] = DocidList([docid]) else: if not docid in tree[wid]: tree[wid].insert(docid) for wid in removed_wordids: if tree_has(wid): try: tree[wid].remove(docid) except KeyError: pass self._docweight[docid] = count
def getWordIdsForDocId(self, docid): try: ps_wrapper = self._doc2wid[docid] return decode(ps_wrapper.get()) except KeyError: raise StorageException('No such docid: %d' % docid)