class IndexerThread(threading.Thread): def __init__(self, queue, index_directory, log_rate): self.search_engine = Engine(index_directory) self.buffered_writer = self.search_engine.writer(120, 5000 , 512) self.queue = queue self.log_count = 0 self.log_rate = log_rate threading.Thread.__init__(self) # needed for thread to be instantiated """ # The old run: def run(self): while True: job_list = [] while not self.queue.empty(): job = self.queue.get() job_list.append(job) for j in job_list: path, add_or_delete = j time_taken, successful = self.add_or_remove(path, add_or_delete) if self.log_count % 1000 == 0: if successful: if add_or_delete: print "time taken to index path: ", [time_taken, path, str(datetime.now())] else: print "time taken to un-index path: ", [time_taken, path, str(datetime.now())] else: print "index/ un-index of path was unsuccessful: ", [path, str(datetime.now())] self.log_count += 1 time.sleep(self.sleep_time) """ def run(self): while True: while not self.queue.empty(): path, add_or_delete = self.queue.get_nowait() time_taken, successful = self.add_or_remove(path, add_or_delete) if successful: if self.log_count % self.log_rate == 0: if add_or_delete: print "time taken to index path: ", [time_taken, path, str(datetime.now())] else: print "time taken to un-index path: ", [time_taken, path, str(datetime.now())] else: print "index/ un-index of path was unsuccessful: ", [path, str(datetime.now())] self.log_count += 1 """ This method indexes/ removes an index of 'path', and returns the time taken to do so. params: - path: the path to index/ remove from index - add_or_delete: True for add/ False for delete return: - time_taken (0.0 if unsuccessful; i.e. directory, not file) - boolean of successful or not (whether file or directory) """ def add_or_remove(self, path, add_or_delete): if os.path.isdir(path): return [0.0, False] t0 = time.time() if add_or_delete: self.search_engine.add_document(path, self.buffered_writer) else: self.search_engine.remove_document(path, self.buffered_writer) t1 = time.time() time_taken = t1 - t0 return [time_taken, True]
- Using monitored_directory and index_directory2 """ parser = argparse.ArgumentParser(description="the index directory") parser.add_argument("index_directory", help="the index_directory to refresh") parser.add_argument("monitored_directory", help="the monitored_directory to load into queue") args = parser.parse_args() index_directory = args.index_directory monitored_directory = args.monitored_directory search_engine = Engine(index_directory) buffered_writer = search_engine.writer(120, 1000, 512) queue = Queue.Queue() path = monitored_directory def recursive_put(path, queue): if os.path.isfile(path): queue.put(path) elif os.path.isdir(path): ls = os.listdir(path) for l in ls: recursive_put(os.path.join(path, l), queue) else: pass