def doc_op(self, doc, action='insert'): if action == 'insert': doc['_id'] = str(uuid.uuid1()).decode() doc = self.integrity(doc, action) path = self.gen_path(doc) ext = ExtHandler.get_ext(path) # file operation self.fhdict[ext].operate(path, doc, action) # index operation with self.ix.writer() as writer: self.index_doc(doc, writer, action)
def init(self, clean=False): with self.ix.writer() as writer: count = 0 for root, dirnames, filenames in os.walk(self.dir): for filename in filenames: ext = ExtHandler.get_ext(filename) if ext not in self.ext: continue self.index_file(os.path.join(root, filename), writer) count += 1 if count % 1000 == 0: utils.logger.info('create index for %d files' % count) writer.mergetype = writing.CLEAR # re-index without disturbing current users
def index_file(self, path, writer): ext = ExtHandler.get_ext(path) for doc in self.fhdict[ext].iter(path): self.index_doc(doc, writer, 'insert')