def _modify_index(self, index, schema, wikiname, revids, mode='add', procs=1, limitmb=256): """ modify index contents - add, update, delete the indexed documents for all given revids Note: mode == 'add' is faster but you need to make sure to not create duplicate documents in the index. """ if procs == 1: # MultiSegmentWriter sometimes has issues and is pointless for procs == 1, # so use the simple writer when --procs 1 is given: writer = index.writer() else: writer = MultiSegmentWriter(index, procs, limitmb) with writer as writer: for revid in revids: if mode in ['add', 'update', ]: meta, data = self.backend.retrieve(revid) content = convert_to_indexable(meta, data) doc = backend_to_index(meta, content, schema, wikiname) if mode == 'update': writer.update_document(**doc) elif mode == 'add': writer.add_document(**doc) elif mode == 'delete': writer.delete_by_term(REVID, revid) else: raise ValueError("mode must be 'update', 'add' or 'delete', not '%s'" % mode)
class Writer(Indexer): def __init__(self, schema, name, index_type, commit_count = 5): super(Writer, self).__init__(schema, name, index_type) self.__commit_count = commit_count self.writer = self.get_writer() self.__count = 0 self.__isMultiSegment = False def get_writer(self): return self.index.writer() def set_multiSegmentWriter(self, limitmb = 128, procs = 4): self.__isMultiSegment = True self.writer = MultiSegmentWriter(self.index, limitmb, procs) def save(self, item): self.writer.update_document(**item) self.__count += 1 if not self.__isMultiSegment and self.__count == self.__commit_count: self.commit() self.writer = self.get_writer() self.__count = 0 def commit(self): self.writer.commit()