def multi_process_train( args, error_queue: mp_queues.SimpleQueue, output_queue: Optional[mp_queues.Queue], init_fn: Optional[Callable[[], None]] = None, ): try: if init_fn: init_fn() torch.cuda.set_device(args.device_id) if args.distributed_world_size > 1: args.distributed_rank = distributed_utils.distributed_init(args) extra_state, trainer, task, epoch_itr = setup_training(args) train( args=args, extra_state=extra_state, trainer=trainer, task=task, epoch_itr=epoch_itr, output_queue=output_queue, ) except KeyboardInterrupt: pass # killed by parent, do nothing except Exception: # propagate exception to parent process, keeping original traceback import traceback error_queue.put((args.distributed_rank, traceback.format_exc()))
def _handle_workers(cls, ctx, processes: int, Proc, task_queue: SimpleQueue, in_queue: SimpleQueue, out_queue: SimpleQueue, init_args, worker: Worker, wrap_exception, change_notifier: SimpleQueue, pool: List[Process]): """ 管理进程池中的所有进程,在线程中执行 :param ctx: 进程上下文 :param processes: 指定的进程数量 :param Proc: 用于创建进程,使用get_context()完成 :param in_queue: 将任务发送给进程 :param out_queue: 从执行完的进程获取数据 :param init_args: 初始化数据 :param worker: 指定的Worker :param wrap_exception: 是否需要包裹任务执行异常 :return: """ cur_th = threading.current_thread() while cur_th._state == State.RUN: cls._maintain_pool(ctx, processes, Proc, in_queue, out_queue, init_args, worker, wrap_exception, pool) cls._wait_for_updates(change_notifier) # exit thread logging.debug("send exit signal to task queue") task_queue.put(EndSignal.END)
def __call__(self, in_queue: SimpleQueue, out_queue: SimpleQueue, init_args, wrap_exception, *args, **kwargs): if init_args: self.initializer(init_args) while True: try: logging.debug("waiting recv task") task = in_queue.get() logging.debug("task received") except (EOFError, OSError): logging.debug('worker got EOFError or OSError -- exiting') break if task is None: logging.debug('worker got sentinel -- exiting') break p_args = task.args() if isinstance(p_args, Tuple): args_l, plugins = p_args exit_code = self._main(args_l, plugins) else: exit_code = self._main(p_args) try: out_queue.put(exit_code) except Exception as e: out_queue.put(e)
def export_table(host, port, auth_key, db, table, directory, fields, format, error_queue, progress_info, stream_semaphore, exit_event): writer = None try: # This will open at least one connection for each rdb_call_wrapper, which is # a little wasteful, but shouldn't be a big performance hit conn_fn = lambda: r.connect(host, port, auth_key=auth_key) rdb_call_wrapper(conn_fn, "count", get_table_size, db, table, progress_info) table_info = rdb_call_wrapper(conn_fn, "info", write_table_metadata, db, table, directory) with stream_semaphore: task_queue = SimpleQueue() writer = launch_writer(format, directory, db, table, fields, task_queue, error_queue) writer.start() rdb_call_wrapper(conn_fn, "table scan", read_table_into_queue, db, table, table_info["primary_key"], task_queue, progress_info, exit_event) except (r.RqlError, r.RqlDriverError) as ex: error_queue.put((RuntimeError, RuntimeError(ex.message), traceback.extract_tb(sys.exc_info()[2]))) except: ex_type, ex_class, tb = sys.exc_info() error_queue.put((ex_type, ex_class, traceback.extract_tb(tb))) finally: if writer is not None and writer.is_alive(): task_queue.put(("exit", "event")) # Exit is triggered by sending a message with two objects writer.join() else: error_queue.put((RuntimeError, RuntimeError("writer unexpectedly stopped"), traceback.extract_tb(sys.exc_info()[2])))
def _wrapped_function(self, process_idx: int, function: Callable, args: Any, kwargs: Any, return_queue: SimpleQueue) -> None: self._worker_setup(process_idx) result = function(*args, **kwargs) if self.local_rank == 0: return_queue.put(move_data_to_device(result, "cpu"))
def _wrapping_function( self, process_idx: int, trainer: Optional["pl.Trainer"], function: Callable, args: Any, kwargs: Any, return_queue: SimpleQueue, ) -> None: self._strategy._worker_setup(process_idx) results = function(*args, **kwargs) if trainer is not None: results = self._collect_rank_zero_results(trainer, results) if self._strategy.local_rank == 0: return_queue.put(move_data_to_device(results, "cpu")) # https://github.com/pytorch/xla/issues/1801#issuecomment-602799542 self._strategy.barrier("end-process") # Ensure that the rank 0 process is the one exiting last # https://github.com/pytorch/xla/issues/2190#issuecomment-641665358 if self._strategy.local_rank == 0: time.sleep(2)
class Logger(object): def __init__(self, logfilepath): try: os.remove(logfilepath) except OSError: pass self.logfilepath = logfilepath self.logq = SimpleQueue() self.tags = '' self.num_tags = 0 def add_tag(self, tag): #self.log("adding tag {}".format(tag)) self.num_tags += 1 if self.tags != '': self.tags = self.tags + '.' + tag else: self.tags = tag def remove_tag(self): #self.log("removing tag") tags = self.tags.split('.') self.tags = ".".join(tags[:-1]) self.num_tags -= 1 def get_tag_part(self): if self.tags != '': return self.tags + ": " else: return '' def log(self, message, start_group=None, end_group=None): assert(type(message)==str) self.logq.put(" "*self.num_tags*4 + self.get_tag_part() + message + '\n') def getlog(self): return self.logq.get() def getlogs(self, n=None): logs = [] if n == None: while not self.logq.empty(): logs.append(self.getlog()) else: assert(type(n)==int) while not (self.logq.empty() or len(logs) == n): logs.append(self.getlog()) return logs def write_to_file(self): # mode 'a' for append with open(self.logfilepath, 'a') as f: f.writelines(self.getlogs())
class MyPoolwithPipe(BasePool): """ 带管道的进程池类,为每个进程额外添加了两个带锁的管道,可以时间双工的数据传输 """ def __init__(self, processes=None): """ MyPoolwithPipe的构造函数 :param processes: 最大进程数 """ BasePool.__init__(self, processes) def _setup_queues(self): """ 设定用于通信的SimpleQueue :return: """ BasePool._setup_queues(self) self._get_data_queue = SimpleQueue() self._require_data_queue = SimpleQueue() def _repopulate_pool(self): """Bring the number of pool processes up to the specified number, for use after reaping workers which have exited. """ for i in range(self._processes - len(self._pool)): w = self.Process( target=myworker, args=(self._inqueue, self._outqueue, self._initializer, self._initargs, self._maxtasksperchild, self._require_data_queue, self._get_data_queue)) self._pool.append(w) w.name = w.name.replace('Process', 'PoolWorker') w.daemon = True w.start() debug('added worker') def send_data(self, data): """ 向管道传送数据 :param data: 数据交换类的初始化字典 :return: """ self._get_data_queue.put(DataExchange(data['head'], data['data'])()) def get_data(self): """ 获得进程池内进程的数据请求 :return: 请求的数据 """ return self._require_data_queue.get() def set_stop(self): """ 关闭数据服务进程 :return: """ self._require_data_queue.put(-1)
def test_can_pickle_via_queue(self): """ https://github.com/andresriancho/w3af/issues/8748 """ sq = SimpleQueue() u1 = URL('http://www.w3af.com/') sq.put(u1) u2 = sq.get() self.assertEqual(u1, u2)
def test_can_pickle_via_queue(self): """ https://github.com/andresriancho/w3af/issues/8748 """ sq = SimpleQueue() u1 = URL('http://www.w3af.com/') sq.put(u1) u2 = sq.get() self.assertEqual(u1, u2)
def main(): sfile = settings.BIG_FILE fsize = os.path.getsize(sfile) with open(sfile, "r") as fh: chunks = size_chunks(fh, fsize, num_chunks=settings.BIGFILE_MP_CHUNKS) # Debug #for c in chunks: #print(c) q = Queue() pattern = re.compile(settings.TARGET_USERNAME) # consumer #con = multiprocessing.Process(target=opener, args=(cat(grep(pattern, writer())),)) #con.daemon = True #con.start() # producer producers = [] file_handles = [] for chunk in chunks: fh = open(sfile, "r") file_handles.append(fh) o = opener(cat(chunk, grep(pattern, writer(q)))) t = multiprocessing.Process(target=sender, args=(o,)) t.daemon = True producers.append(t) for p in producers: p.start() for p in producers: p.join() #con.join() q.put(None) # sentinel for f in file_handles: f.close() recsmatch = 0 print("Before queue comp") while True: x = q.get() if x == None: break recsmatch += 1 print("After queue comp") print("recsmatch={r} chunks={c}".format(r=recsmatch, c=settings.BIGFILE_MP_CHUNKS))
def main(): sfile = settings.BIG_FILE fsize = os.path.getsize(sfile) with open(sfile, "r") as fh: chunks = size_chunks(fh, fsize, num_chunks=settings.BIGFILE_MP_CHUNKS) # Debug # for c in chunks: # print(c) q = Queue() pattern = re.compile(settings.TARGET_USERNAME) # consumer # con = multiprocessing.Process(target=opener, args=(cat(grep(pattern, writer())),)) # con.daemon = True # con.start() # producer producers = [] file_handles = [] for chunk in chunks: fh = open(sfile, "r") file_handles.append(fh) o = opener(cat(chunk, grep(pattern, writer(q)))) t = multiprocessing.Process(target=sender, args=(o,)) t.daemon = True producers.append(t) for p in producers: p.start() for p in producers: p.join() # con.join() q.put(None) # sentinel for f in file_handles: f.close() recsmatch = 0 print("Before queue comp") while True: x = q.get() if x == None: break recsmatch += 1 print("After queue comp") print("recsmatch={r} chunks={c}".format(r=recsmatch, c=settings.BIGFILE_MP_CHUNKS))
def _terminate_pool(_task_queue: SimpleQueue, _in_queue: SimpleQueue, out_queue: SimpleQueue, pool: List[Process], change_notifier: SimpleQueue, worker_handler_th: Thread, handle_task_th: Thread, handle_result_th: Thread): """ 终止进程池 :param _task_queue: 暂不使用 :param _in_queue: 暂不使用 :param out_queue: 通知结束进程 :param pool: 进程池 :param change_notifier: 通知状态改变 :param worker_handler_th: worker管理进程 :param handle_task_th: 任务管理进程 :param handle_result_th: 执行结果管理进程 :return: """ worker_handler_th._state = State.TERMINATE handle_task_th._state = State.TERMINATE assert handle_result_th.is_alive(), "result handler not alive" handle_result_th._state = State.TERMINATE # 发送终止信号 change_notifier.put(EndSignal.END) out_queue.put(EndSignal.END) # 等待检测进程的线程退出 if threading.current_thread() != worker_handler_th: worker_handler_th.join() # 向进程池中的所有进程发送终止信号 if pool: for p in pool: if p.exitcode is None: p.terminate() # 等待任务处理线程退出 if threading.current_thread() != handle_task_th: handle_task_th.join() # 等待处理结果线程退出 if threading.current_thread() != handle_result_th: handle_result_th.join() # 等待所有存活的进程退出 if pool: for p in pool: if p.is_alive(): p.join()
def QuiverPlotter(num): data_q = SimpleQueue() plot = Process(target=quiverPlotter,args=(data_q,num)) plot.start() try: while True: data = (yield) if data_q.empty() == False: continue data_q.put(data) except GeneratorExit: plot.join()
def Plotter3D(plots,scale): data_q = SimpleQueue() plot = Process(target=plotter3D,args=(data_q,plots,scale)) plot.start() data = {} try: while True: data.update((yield)) if data_q.empty() == False: continue data_q.put(data) except GeneratorExit: pass
def _wrapped_function(self, process_idx: int, function: Callable, args: Any, kwargs: Any, return_queue: SimpleQueue) -> None: self._worker_setup(process_idx) result = function(*args, **kwargs) if self.local_rank == 0: return_queue.put(move_data_to_device(result, "cpu")) # https://github.com/pytorch/xla/issues/1801#issuecomment-602799542 self.barrier("end-process") # Ensure that the rank 0 process is the one exiting last # https://github.com/pytorch/xla/issues/2190#issuecomment-641665358 if self.local_rank == 0: time.sleep(2)
class StatusTracker(object): def __init__(self): self.logq = SimpleQueue() self.history = [] def put(self, msg): assert(type(msg)==str) self.logq.put(msg) def flushq(self): while not self.logq.empty(): self.history.append(self.logq.get()) self.prune_history() def prune_history(self): self.history = self.history[-100:]
def __call__(self, task_queue: SimpleQueue, pool: List[Process], in_queue: SimpleQueue, out_queue: SimpleQueue, cache): cur_th = threading.current_thread() while True: if cur_th._state != State.RUN: logging.debug('task handler found thread._state != RUN') break task = task_queue.get() if task is EndSignal.END: logging.debug("got exit signal") break assert isinstance(task, Task), "task must implement Task class" try: in_queue.put(task) except Exception as e: logging.error(e)
def _wrapping_function( self, process_idx: int, trainer: Optional["pl.Trainer"], function: Callable, args: Any, kwargs: Any, return_queue: SimpleQueue, ) -> None: self._strategy._worker_setup(process_idx) results = function(*args, **kwargs) if trainer is not None: results = self._collect_rank_zero_results(trainer, results) if self._strategy.local_rank == 0: return_queue.put(move_data_to_device(results, "cpu"))
def DensityPlotter(num,size): # num = size/scale range = [[-size,size],[-size,size]] data_q = SimpleQueue() plot = Process(target=imagedraw,args=(data_q,num)) plot.start() while True: x = (yield) if data_q.empty() == False: continue hist,_,_ = np.histogram2d(x[:,0],x[:,1],bins=num,range=range) avg = np.average(hist) hist = (hist - avg)/avg data_q.put(hist.astype(np.float32))
class ErrorMonitor: def __init__(self): self.pipe = SimpleQueue() self.message = None def main(self): while True: message = self.pipe.get() if message != 'Q': self.message = message[1:] LongJump.longjump() break else: self.pipe = None break def haserror(self): """ master only """ return self.message is not None def start(self): """ master only """ self.thread = Thread(target=self.main) self.thread.daemon = True self.thread.start() def join(self): """ master only """ try: self.pipe.put('Q') self.thread.join() except: pass finally: self.thread = None def slaveraise(self, type, error, traceback): """ slave only """ message = 'E' * 1 + pickle.dumps( (type, ''.join(tb.format_exception(type, error, traceback)))) if self.pipe is not None: self.pipe.put(message)
class LinePlotter: def __init__(self,*args,**kwargs): self.data_q = SimpleQueue() self.data = {} self.plot = LinePlotterProcess(self.data_q) self.plot.add_plot(*args,**kwargs) def show(self): self.plot.start() def add_plot(self,*args,**kwargs): self.plot.add_plot(*args,**kwargs) def send(self,data): if data == GeneratorExit: self.plot.join() self.data.update(data) if self.data_q.empty() != False: self.data_q.put(data)
def from_twitter_api(target, endpoint, config): """Consume tweets from a Streaming API endpoint.""" endpoint_to_url = { 'twitter://sample': 'https://stream.twitter.com/1.1/statuses/sample.json', 'twitter://filter': 'https://stream.twitter.com/1.1/statuses/filter.json', } if endpoint == 'twitter://filter': filter_predicates = config.global_filter.predicates kwargs = { 'follow': filter_predicates['follow'], 'track': filter_predicates['track'], 'locations': filter_predicates['locations'], } else: kwargs = {} # The communication point of the consumer and producer processes. queue = SimpleQueue() # Start the consumer first consumer = StreamConsumer(queue, target) consumer.start() # then the producer. producer = StreamProducer(twitter_credentials=dict( config.items('twitter')), target=consumers.to_simple_queue(queue), url=endpoint_to_url[endpoint], **kwargs) producer.start() try: producer.join() finally: queue.put(StopIteration) consumer.join()
class ErrorMonitor: def __init__(self): self.pipe = SimpleQueue() self.message = None def main(self): while True: message = self.pipe.get() if message != 'Q': self.message = message[1:] LongJump.longjump() break else: self.pipe = None break def haserror(self): """ master only """ return self.message is not None def start(self): """ master only """ self.thread = Thread(target=self.main) self.thread.daemon = True self.thread.start() def join(self): """ master only """ try: self.pipe.put('Q') self.thread.join() except: pass finally: self.thread = None def slaveraise(self, type, error, traceback): """ slave only """ message = 'E' * 1 + pickle.dumps((type, ''.join(tb.format_exception(type, error, traceback)))) if self.pipe is not None: self.pipe.put(message)
class BaseMultiprocessingRunner(BaseRunner): def __init__(self): super(BaseMultiprocessingRunner, self).__init__() self.numprocs = max(multiprocessing.cpu_count() - 1, 1) self.map_input_queue = SimpleQueue() self.map_output_queue = SimpleQueue() def run_map(self): for item in iter(self.map_input_queue.get, self.STOP_MSG): self.job.map(item, self.map_output_queue.put) self.map_output_queue.put(self.STOP_MSG) if self.debug: debug_print("Output : STOP sent") def run_enumerate(self): for inp in self.job.enumerate(): self.map_input_queue.put(inp) for work in range(self.numprocs): self.map_input_queue.put(self.STOP_MSG) if self.debug: debug_print("Input: STOP sent") def run(self, job): self.job = job # Process that reads the input file self.enumeration_process = multiprocessing.Process(target=self.run_enumerate, args=()) self.mappers = [ multiprocessing.Process(target=self.run_map, args=()) for i in range(self.numprocs)] self.enumeration_process.start() for mapper in self.mappers: mapper.start() r = self.run_reduce() self.enumeration_process.join() for mapper in self.mappers: mapper.join() return r
def main(): global TCP_SEND_PORT global TCP_SEND_IP global TCP_RECEIVE_IP global TCP_RECEIVE_PORT global key_store global eventual_requests global eventual_write_lock global eventual_read_lock key_store = {} eventual_requests = {} eventual_write_lock = threading.Lock() eventual_read_lock = threading.Lock() signal.signal(signal.SIGINT, signal_handler) TCP_RECEIVE_IP = TCP_SEND_IP = socket.gethostbyname(socket.gethostname()) TCP_SEND_PORT = int(sys.argv[1]) TCP_RECEIVE_PORT = int(sys.argv[2]) BUFFER_SIZE = 1024 listener = threading.Thread(target=listening_thread, args=[BUFFER_SIZE]) listener.daemon = True listener.start() message_queue = SimpleQueue() worker = threading.Thread(target=worker_thread, args=[message_queue]) worker.daemon = True worker.start() while 1: command = str( raw_input(bcolors.HEADER + bcolors.UNDERLINE + "Enter Message:\n" + bcolors.ENDC)) messages = [] if command.endswith('.txt'): messages = readFile(command) else: messages.append(command) message_queue.put(messages) print bcolors.OKBLUE + 'System time is ' + \ str(datetime.datetime.now().strftime("%H:%M:%S:%f")) + bcolors.ENDC
def export_table(host, port, auth_key, db, table, directory, fields, format, error_queue, progress_info, stream_semaphore, exit_event): writer = None try: # This will open at least one connection for each rdb_call_wrapper, which is # a little wasteful, but shouldn't be a big performance hit conn_fn = lambda: r.connect(host, port, auth_key=auth_key) rdb_call_wrapper(conn_fn, "count", get_table_size, db, table, progress_info) table_info = rdb_call_wrapper(conn_fn, "info", write_table_metadata, db, table, directory) with stream_semaphore: task_queue = SimpleQueue() writer = launch_writer(format, directory, db, table, fields, task_queue, error_queue) writer.start() rdb_call_wrapper(conn_fn, "table scan", read_table_into_queue, db, table, table_info["primary_key"], task_queue, progress_info, exit_event) except (r.RqlError, r.RqlDriverError) as ex: error_queue.put((RuntimeError, RuntimeError(ex.message), traceback.extract_tb(sys.exc_info()[2]))) except: ex_type, ex_class, tb = sys.exc_info() error_queue.put((ex_type, ex_class, traceback.extract_tb(tb))) finally: if writer is not None and writer.is_alive(): task_queue.put( ("exit", "event" )) # Exit is triggered by sending a message with two objects writer.join() else: error_queue.put( (RuntimeError, RuntimeError("writer unexpectedly stopped"), traceback.extract_tb(sys.exc_info()[2])))
def main(): global TCP_SEND_PORT global TCP_SEND_IP global TCP_RECEIVE_IP global TCP_RECEIVE_PORT global key_store global eventual_requests global eventual_write_lock global eventual_read_lock key_store = {} eventual_requests = {} eventual_write_lock = threading.Lock() eventual_read_lock = threading.Lock() signal.signal(signal.SIGINT, signal_handler) TCP_RECEIVE_IP = TCP_SEND_IP = socket.gethostbyname(socket.gethostname()) TCP_SEND_PORT = int(sys.argv[1]) TCP_RECEIVE_PORT = int(sys.argv[2]) BUFFER_SIZE = 1024 listener = threading.Thread(target=listening_thread, args=[BUFFER_SIZE]) listener.daemon = True listener.start() message_queue = SimpleQueue() worker = threading.Thread(target=worker_thread, args=[message_queue]) worker.daemon = True worker.start() while 1: command = str(raw_input(bcolors.HEADER + bcolors.UNDERLINE + "Enter Message:\n" + bcolors.ENDC)) messages = [] if command.endswith('.txt'): messages = readFile(command) else: messages.append(command) message_queue.put(messages) print bcolors.OKBLUE + 'System time is ' + \ str(datetime.datetime.now().strftime("%H:%M:%S:%f")) + bcolors.ENDC
class BaseMultiprocessingRunner(BaseRunner): def __init__(self): super(BaseMultiprocessingRunner, self).__init__() self.numprocs = max(multiprocessing.cpu_count() - 1, 1) self.map_input_queue = SimpleQueue() self.map_output_queue = SimpleQueue() def run_map(self): for item in iter(self.map_input_queue.get, self.STOP_MSG): self.job.map(item, self.map_output_queue.put) self.map_output_queue.put(self.STOP_MSG) if self.debug: debug_print("Output : STOP sent") def run_enumerate(self): for inp in self.job.enumerate(): self.map_input_queue.put(inp) for work in range(self.numprocs): self.map_input_queue.put(self.STOP_MSG) if self.debug: debug_print("Input: STOP sent") def run(self, job): self.job = job # Process that reads the input file self.enumeration_process = multiprocessing.Process(target=self.run_enumerate, args=()) self.mappers = [multiprocessing.Process(target=self.run_map, args=()) for i in range(self.numprocs)] self.enumeration_process.start() for mapper in self.mappers: mapper.start() r = self.run_reduce() self.enumeration_process.join() for mapper in self.mappers: mapper.join() return r
def main(): FORMAT = '%(levelname)s %(asctime)-15s %(threadName)s %(message)s' logging.basicConfig(format=FORMAT, level=logging.DEBUG) with open(sys.argv[1]) as fp: procs = [] read_queue = Queue() write_queue = Queue() for i in xrange(0, 10): procs.append(Process(target=query, args=(read_queue, write_queue))) write_proc = Process(target=write_file, args=(write_queue, )) map(lambda proc: proc.start(), procs) write_proc.start() try: for x in fp: read_queue.put(x.rstrip('\n')) except Exception as e: print e finally: for i in xrange(0, 10): read_queue.put("End of File") map(lambda proc: proc.join(), procs) write_queue.put(("End of File", False)) write_proc.join()
#open tree file if args.treeFile: treeFile = gzip.open(args.treeFile, "r") if args.treeFile.endswith(".gz") else open( args.treeFile, "r") else: treeFile = sys.stdin line = treeFile.readline() ########################################################################################################################################## while len(line) >= 1: lineQueue.put((linesQueued, line.rstrip())) linesQueued += 1 line = treeFile.readline() ############################################################################################################################################ ### wait for queues to empty print >> sys.stderr, "\nWriting final results...\n" while resultsHandled < linesQueued: sleep(1) sleep(5) treeFile.close() weightsFile.close()
def magic_memit(self, line=''): """Measure memory usage of a Python statement Usage, in line mode: %memit [-ir<R>t<T>] statement Options: -r<R>: repeat the loop iteration <R> times and take the best result. Default: 1 -i: run the code in the current environment, without forking a new process. This is required on some MacOS versions of Accelerate if your line contains a call to `np.dot`. -t<T>: timeout after <T> seconds. Unused if `-i` is active. Default: None Examples -------- :: In [1]: import numpy as np In [2]: %memit np.zeros(1e7) maximum of 1: 76.402344 MB per loop In [3]: %memit np.ones(1e6) maximum of 1: 7.820312 MB per loop In [4]: %memit -r 10 np.empty(1e8) maximum of 10: 0.101562 MB per loop In [5]: memit -t 3 while True: pass; Subprocess timed out. Subprocess timed out. Subprocess timed out. ERROR: all subprocesses exited unsuccessfully. Try again with the `-i` option. maximum of 1: -inf MB per loop """ opts, stmt = self.parse_options(line, 'r:t:i', posix=False, strict=False) repeat = int(getattr(opts, 'r', 1)) if repeat < 1: repeat == 1 timeout = int(getattr(opts, 't', 0)) if timeout <= 0: timeout = None run_in_place = hasattr(opts, 'i') # Don't depend on multiprocessing: try: import multiprocessing as pr from multiprocessing.queues import SimpleQueue q = SimpleQueue() except ImportError: class ListWithPut(list): "Just a list where the `append` method is aliased to `put`." def put(self, x): self.append(x) q = ListWithPut() print ('WARNING: cannot import module `multiprocessing`. Forcing the' '`-i` option.') run_in_place = True ns = self.shell.user_ns if run_in_place: for _ in xrange(repeat): _get_usage(q, stmt, ns=ns) else: # run in consecutive subprocesses at_least_one_worked = False for _ in xrange(repeat): p = pr.Process(target=_get_usage, args=(q, stmt, 'pass', ns)) p.start() p.join(timeout=timeout) if p.exitcode == 0: at_least_one_worked = True else: p.terminate() if p.exitcode == None: print('Subprocess timed out.') else: print('Subprocess exited with code %d.' % p.exitcode) q.put(float('-inf')) if not at_least_one_worked: print ('ERROR: all subprocesses exited unsuccessfully. Try again ' 'with the `-i` option.') usages = [q.get() for _ in xrange(repeat)] usage = max(usages) print('maximum of %d: %f MB per loop' % (repeat, usage))
def magic_memit(ns, line='', repeat=1, timeout=None, run_in_place=True): """Measure memory usage of a Python statement Usage, in line mode: %memit [-ir<R>t<T>] statement Options: -r<R>: repeat the loop iteration <R> times and take the best result. Default: 3 -i: run the code in the current environment, without forking a new process. This is required on some MacOS versions of Accelerate if your line contains a call to `np.dot`. -t<T>: timeout after <T> seconds. Unused if `-i` is active. Default: None Examples -------- :: In [1]: import numpy as np In [2]: %memit np.zeros(1e7) maximum of 3: 76.402344 MB per loop In [3]: %memit np.ones(1e6) maximum of 3: 7.820312 MB per loop In [4]: %memit -r 10 np.empty(1e8) maximum of 10: 0.101562 MB per loop In [5]: memit -t 3 while True: pass; Subprocess timed out. Subprocess timed out. Subprocess timed out. ERROR: all subprocesses exited unsuccessfully. Try again with the `-i` option. maximum of 3: -inf MB per loop """ if repeat < 1: repeat == 1 if timeout <= 0: timeout = None # Don't depend on multiprocessing: try: import multiprocessing as pr from multiprocessing.queues import SimpleQueue q = SimpleQueue() except ImportError: class ListWithPut(list): "Just a list where the `append` method is aliased to `put`." def put(self, x): self.append(x) q = ListWithPut() print ('WARNING: cannot import module `multiprocessing`. Forcing the' '`-i` option.') run_in_place = True def _get_usage(q, stmt, setup='pass', ns={}): from memory_profiler import memory_usage as _mu try: exec setup in ns _mu0 = _mu()[0] exec stmt in ns _mu1 = _mu()[0] q.put(_mu1 - _mu0) except Exception as e: q.put(float('-inf')) raise e if run_in_place: for _ in xrange(repeat): _get_usage(q, line, ns=ns) else: # run in consecutive subprocesses at_least_one_worked = False for _ in xrange(repeat): p = pr.Process(target=_get_usage, args=(q, line, 'pass', ns)) p.start() p.join(timeout=timeout) if p.exitcode == 0: at_least_one_worked = True else: p.terminate() if p.exitcode == None: print 'Subprocess timed out.' else: print 'Subprocess exited with code %d.' % p.exitcode q.put(float('-inf')) if not at_least_one_worked: raise RuntimeError('ERROR: all subprocesses exited unsuccessfully.' ' Try again with the `-i` option.') usages = [q.get() for _ in xrange(repeat)] usage = max(usages) return usage
def memit(self, line='', setup='pass'): """Measure memory usage of a Python statement Usage, in line mode: %memit [-ir<R>t<T>] statement Options: -r<R>: repeat the loop iteration <R> times and take the best result. Default: 3 -i: run the code in the current environment, without forking a new process. This is required on some MacOS versions of Accelerate if your line contains a call to `np.dot`. -t<T>: timeout after <T> seconds. Unused if `-i` is active. Default: None Examples -------- :: In [1]: import numpy as np In [2]: %memit np.zeros(1e7) maximum of 3: 76.402344 MB per loop In [3]: %memit np.ones(1e6) maximum of 3: 7.820312 MB per loop In [4]: %memit -r 10 np.empty(1e8) maximum of 10: 0.101562 MB per loop In [5]: memit -t 3 while True: pass; Subprocess timed out. Subprocess timed out. Subprocess timed out. ERROR: all subprocesses exited unsuccessfully. Try again with the `-i` option. maximum of 3: -inf MB per loop """ opts, stmt = self.parse_options(line, 'r:t:i', posix=False, strict=False) repeat = int(getattr(opts, 'r', 3)) if repeat < 1: repeat == 1 timeout = int(getattr(opts, 't', 0)) if timeout <= 0: timeout = None run_in_place = hasattr(opts, 'i') # Don't depend on multiprocessing: try: import multiprocessing as pr from multiprocessing.queues import SimpleQueue q = SimpleQueue() except ImportError: class ListWithPut(list): "Just a list where the `append` method is aliased to `put`." def put(self, x): self.append(x) q = ListWithPut() print( 'WARNING: cannot import module `multiprocessing`. Forcing ' 'the `-i` option.') run_in_place = True ns = self.shell.user_ns def _get_usage(q, stmt, setup='pass', ns={}): try: exec(setup) in ns _mu0 = _mu()[0] exec(stmt) in ns _mu1 = _mu()[0] q.put(_mu1 - _mu0) except Exception as e: q.put(float('-inf')) raise e if run_in_place: for _ in xrange(repeat): _get_usage(q, stmt, ns=ns) else: # run in consecutive subprocesses at_least_one_worked = False for _ in xrange(repeat): p = pr.Process(target=_get_usage, args=(q, stmt, 'pass', ns)) p.start() p.join(timeout=timeout) if p.exitcode == 0: at_least_one_worked = True else: p.terminate() if p.exitcode == None: print('Subprocess timed out.') else: print('Subprocess exited with code %d.' % p.exitcode) q.put(float('-inf')) if not at_least_one_worked: print('ERROR: all subprocesses exited unsuccessfully. Try ' 'again with the `-i` option.') usages = [q.get() for _ in xrange(repeat)] usage = max(usages) print("maximum of %d: %f MB per loop" % (repeat, usage))
class TaskQueueDispatcher: """Incapsulate data structures necessary for dispatching workers working on the one task queue. """ def __init__(self, key, task_group, randomize): self.key = key self.gen_worker = task_group['gen_worker'] self.task_ids = task_group['task_ids'] self.is_parallel = task_group['is_parallel'] if self.is_parallel: self.randomize = randomize if self.randomize: random.shuffle(self.task_ids) else: self.randomize = False self.result_queue = SimpleQueue() self.task_queue = SimpleQueue() for task_id in self.task_ids: self.task_queue.put(task_id) self.worker_ids = set() self.done = False self.done_task_ids = set() def _run_worker(self, worker_id, tcp_port_range): """Entry function for worker processes.""" os.environ['TEST_RUN_WORKER_ID'] = str(worker_id) os.environ['TEST_RUN_TCP_PORT_START'] = str(tcp_port_range[0]) os.environ['TEST_RUN_TCP_PORT_END'] = str(tcp_port_range[1]) color_stdout.queue = self.result_queue worker = self.gen_worker(worker_id) worker.run_all(self.task_queue, self.result_queue) def add_worker(self, worker_id, tcp_port_range): # Note: each of our workers should consume only one None, but for the # case of abnormal circumstances we listen for processes termination # (method 'check_for_dead_processes') and for time w/o output from # workers (class 'HangWatcher'). self.task_queue.put(None) # 'stop worker' marker entry = functools.partial(self._run_worker, worker_id, tcp_port_range) self.worker_ids.add(worker_id) process = multiprocessing.Process(target=entry) process.start() return process def del_worker(self, worker_id): self.worker_ids.remove(worker_id) # mark task queue as done when the first worker done to prevent cycling # with add-del workers self.done = True def mark_task_done(self, task_id): self.done_task_ids.add(task_id) def undone_tasks(self): # keeps an original order res = [] for task_id in self.task_ids: if task_id not in self.done_task_ids: res.append(task_id) return res
class Table(Process): def __init__(self, patterns): super(Table, self).__init__() self.patterns = patterns self.input_interface = Queue() self.output_interfaces = {'': Queue()} def stop(self): self.input_interface.put(None) self.join() def run(self): while True: try: data = self.input_interface.get() if data is None: return operation, items = data if operation == 'add_entry': index, entry = items self.patterns.add_entry(index, entry) elif operation == 'del_entry': index = items self.patterns.del_entry(index) elif operation == 'query_entry': index = items entry = self.patterns.query_entry(index) self.output_interfaces[''].put(entry) elif operation == 'write': index, pattern = items self.patterns[index] = pattern elif operation == 'read': index, instruction_id = items pattern = self.patterns[index] self.output_interfaces[instruction_id].put(pattern) elif operation == 'lookup': values, instruction_id = items value = -1 if isinstance(self.patterns, MatchPatterns): for i in range(0, len(self.patterns)): pattern_list = self.patterns[i].values() if len(pattern_list) != len(values): raise RuntimeError() if all(map((lambda (value, mask), source: value.value == (source.value & mask)), pattern_list, values)): value = i break elif isinstance(self.patterns, SimplePatterns): for i in range(0, len(self.patterns)): pattern_list = self.patterns[i].values() if all(map((lambda value, source: value.value == source.value), pattern_list, values)): value = i break else: raise RuntimeError() self.output_interfaces[instruction_id].put(value) else: raise RuntimeError() except KeyboardInterrupt: break
class DataLoaderIter(object): "Iterates once over the DataLoader's dataset, as specified by the sampler" def __init__(self, loader): self.dataset = loader.dataset self.collate_fn = loader.collate_fn self.batch_sampler = loader.batch_sampler self.num_workers = loader.num_workers self.pin_memory = loader.pin_memory self.done_event = threading.Event() self.sample_iter = iter(self.batch_sampler) if self.num_workers > 0: self.index_queue = SimpleQueue() self.data_queue = SimpleQueue() self.batches_outstanding = 0 self.shutdown = False self.send_idx = 0 self.rcvd_idx = 0 self.reorder_dict = {} self.workers = [ multiprocessing.Process(target=_worker_loop, args=(self.dataset, self.index_queue, self.data_queue, self.collate_fn)) for _ in range(self.num_workers) ] for w in self.workers: w.daemon = True # ensure that the worker exits on process exit w.start() if self.pin_memory: in_data = self.data_queue self.data_queue = queue.Queue() self.pin_thread = threading.Thread(target=_pin_memory_loop, args=(in_data, self.data_queue, self.done_event)) self.pin_thread.daemon = True self.pin_thread.start() # prime the prefetch loop for _ in range(2 * self.num_workers): self._put_indices() else: if hasattr(self.dataset, 'build'): # Run the build method for the dataset self.dataset.build() def __len__(self): return len(self.batch_sampler) def __next__(self): if self.num_workers == 0: # same-process loading indices = next(self.sample_iter) # may raise StopIteration batch = self.collate_fn([self.dataset[i] for i in indices]) if self.pin_memory: batch = pin_memory_batch(batch) return batch # check if the next sample has already been generated if self.rcvd_idx in self.reorder_dict: batch = self.reorder_dict.pop(self.rcvd_idx) return self._process_next_batch(batch) if self.batches_outstanding == 0: self._shutdown_workers() raise StopIteration while True: assert (not self.shutdown and self.batches_outstanding > 0) idx, batch = self.data_queue.get() self.batches_outstanding -= 1 if idx != self.rcvd_idx: # store out-of-order samples self.reorder_dict[idx] = batch continue return self._process_next_batch(batch) next = __next__ # Python 2 compatibility def __iter__(self): return self def _put_indices(self): assert self.batches_outstanding < 2 * self.num_workers indices = next(self.sample_iter, None) if indices is None: return self.index_queue.put((self.send_idx, indices)) self.batches_outstanding += 1 self.send_idx += 1 def _process_next_batch(self, batch): self.rcvd_idx += 1 self._put_indices() if isinstance(batch, ExceptionWrapper): raise batch.exc_type(batch.exc_msg) return batch def __getstate__(self): # TODO: add limited pickling support for sharing an iterator # across multiple threads for HOGWILD. # Probably the best way to do this is by moving the sample pushing # to a separate thread and then just sharing the data queue # but signalling the end is tricky without a non-blocking API raise NotImplementedError("DataLoaderIterator cannot be pickled") def _shutdown_workers(self): if not self.shutdown: self.shutdown = True self.done_event.set() for _ in self.workers: self.index_queue.put(None) def __del__(self): if self.num_workers > 0: self._shutdown_workers()
class ProcessPoolExecutor(_base.Executor): def __init__(self, max_workers=None): """Initializes a new ProcessPoolExecutor instance. Args: max_workers: The maximum number of processes that can be used to execute the given calls. If None or not given then as many worker processes will be created as the machine has processors. """ _check_system_limits() if max_workers is None: self._max_workers = os.cpu_count() or 1 else: self._max_workers = max_workers # Make the call queue slightly larger than the number of processes to # prevent the worker processes from idling. But don't make it too big # because futures in the call queue cannot be cancelled. self._call_queue = multiprocessing.Queue(self._max_workers + EXTRA_QUEUED_CALLS) # Killed worker processes can produce spurious "broken pipe" # tracebacks in the queue's own worker thread. But we detect killed # processes anyway, so silence the tracebacks. self._call_queue._ignore_epipe = True self._result_queue = SimpleQueue() self._work_ids = queue.Queue() self._queue_management_thread = None # Map of pids to processes self._processes = {} # Shutdown is a two-step process. self._shutdown_thread = False self._shutdown_lock = threading.Lock() self._broken = False self._queue_count = 0 self._pending_work_items = {} def _start_queue_management_thread(self): # When the executor gets lost, the weakref callback will wake up # the queue management thread. def weakref_cb(_, q=self._result_queue): q.put(None) if self._queue_management_thread is None: # Start the processes so that their sentinels are known. self._adjust_process_count() self._queue_management_thread = threading.Thread( target=_queue_management_worker, args=(weakref.ref(self, weakref_cb), self._processes, self._pending_work_items, self._work_ids, self._call_queue, self._result_queue)) self._queue_management_thread.daemon = True self._queue_management_thread.start() _threads_queues[self._queue_management_thread] = self._result_queue def _adjust_process_count(self): for _ in range(len(self._processes), self._max_workers): p = multiprocessing.Process(target=_process_worker, args=(self._call_queue, self._result_queue)) p.start() self._processes[p.pid] = p def submit(self, fn, *args, **kwargs): with self._shutdown_lock: if self._broken: raise BrokenProcessPool( 'A child process terminated ' 'abruptly, the process pool is not usable anymore') if self._shutdown_thread: raise RuntimeError( 'cannot schedule new futures after shutdown') f = _base.Future() w = _WorkItem(f, fn, args, kwargs) self._pending_work_items[self._queue_count] = w self._work_ids.put(self._queue_count) self._queue_count += 1 # Wake up queue management thread self._result_queue.put(None) self._start_queue_management_thread() return f submit.__doc__ = _base.Executor.submit.__doc__ def shutdown(self, wait=True): with self._shutdown_lock: self._shutdown_thread = True if self._queue_management_thread: # Wake up queue management thread self._result_queue.put(None) if wait: self._queue_management_thread.join() # To reduce the risk of opening too many files, remove references to # objects that use file descriptors. self._queue_management_thread = None self._call_queue = None self._result_queue = None self._processes = None shutdown.__doc__ = _base.Executor.shutdown.__doc__
class TaskQueueDispatcher: """Incapsulate data structures necessary for dispatching workers working on the one task queue. """ def __init__(self, key, task_group, randomize): self.key = key self.gen_worker = task_group['gen_worker'] self.task_ids = task_group['task_ids'] self.is_parallel = task_group['is_parallel'] if self.is_parallel: self.randomize = randomize if self.randomize: random.shuffle(self.task_ids) else: self.randomize = False self.result_queue = SimpleQueue() self.task_queue = SimpleQueue() # Don't expose queues file descriptors over Popen to, say, tarantool # running tests. set_fd_cloexec(self.result_queue._reader.fileno()) set_fd_cloexec(self.result_queue._writer.fileno()) set_fd_cloexec(self.task_queue._reader.fileno()) set_fd_cloexec(self.task_queue._writer.fileno()) for task_id in self.task_ids: self.task_queue.put(task_id) self.worker_ids = set() self.done = False self.done_task_ids = set() def _run_worker(self, worker_id, tcp_port_range): """Entry function for worker processes.""" os.environ['TEST_RUN_WORKER_ID'] = str(worker_id) os.environ['TEST_RUN_TCP_PORT_START'] = str(tcp_port_range[0]) os.environ['TEST_RUN_TCP_PORT_END'] = str(tcp_port_range[1]) color_stdout.queue = self.result_queue worker = self.gen_worker(worker_id) worker.run_all(self.task_queue, self.result_queue) def add_worker(self, worker_id, tcp_port_range): # Note: each of our workers should consume only one None, but for the # case of abnormal circumstances we listen for processes termination # (method 'check_for_dead_processes') and for time w/o output from # workers (class 'HangWatcher'). self.task_queue.put(None) # 'stop worker' marker entry = functools.partial(self._run_worker, worker_id, tcp_port_range) self.worker_ids.add(worker_id) process = multiprocessing.Process(target=entry) process.start() return process def del_worker(self, worker_id): self.worker_ids.remove(worker_id) # mark task queue as done when the first worker done to prevent cycling # with add-del workers self.done = True def mark_task_done(self, task_id): self.done_task_ids.add(task_id) def undone_tasks(self): # keeps an original order res = [] for task_id in self.task_ids: if task_id not in self.done_task_ids: res.append(task_id) return res
class ProcessPoolExecutor(_base.Executor): __qualname__ = 'ProcessPoolExecutor' def __init__(self, max_workers=None): _check_system_limits() if max_workers is None: self._max_workers = multiprocessing.cpu_count() else: self._max_workers = max_workers self._call_queue = multiprocessing.Queue(self._max_workers + EXTRA_QUEUED_CALLS) self._call_queue._ignore_epipe = True self._result_queue = SimpleQueue() self._work_ids = queue.Queue() self._queue_management_thread = None self._processes = {} self._shutdown_thread = False self._shutdown_lock = threading.Lock() self._broken = False self._queue_count = 0 self._pending_work_items = {} def _start_queue_management_thread(self): def weakref_cb(_, q=self._result_queue): q.put(None) if self._queue_management_thread is None: self._adjust_process_count() self._queue_management_thread = threading.Thread(target=_queue_management_worker, args=(weakref.ref(self, weakref_cb), self._processes, self._pending_work_items, self._work_ids, self._call_queue, self._result_queue)) self._queue_management_thread.daemon = True self._queue_management_thread.start() _threads_queues[self._queue_management_thread] = self._result_queue def _adjust_process_count(self): for _ in range(len(self._processes), self._max_workers): p = multiprocessing.Process(target=_process_worker, args=(self._call_queue, self._result_queue)) p.start() self._processes[p.pid] = p def submit(self, fn, *args, **kwargs): with self._shutdown_lock: if self._broken: raise BrokenProcessPool('A child process terminated abruptly, the process pool is not usable anymore') if self._shutdown_thread: raise RuntimeError('cannot schedule new futures after shutdown') f = _base.Future() w = _WorkItem(f, fn, args, kwargs) self._pending_work_items[self._queue_count] = w self._work_ids.put(self._queue_count) self._result_queue.put(None) self._start_queue_management_thread() return f submit.__doc__ = _base.Executor.submit.__doc__ def shutdown(self, wait=True): with self._shutdown_lock: self._shutdown_thread = True if self._queue_management_thread: self._result_queue.put(None) if wait: self._queue_management_thread.join() self._queue_management_thread = None self._call_queue = None self._result_queue = None self._processes = None shutdown.__doc__ = _base.Executor.shutdown.__doc__
class ProcessPoolExecutor(_base.Executor): def __init__(self, max_workers=None): """Initializes a new ProcessPoolExecutor instance. Args: max_workers: The maximum number of processes that can be used to execute the given calls. If None or not given then as many worker processes will be created as the machine has processors. """ _check_system_limits() if max_workers is None: self._max_workers = multiprocessing.cpu_count() or 1 else: if max_workers <= 0: raise ValueError("max_workers must be greater than 0") self._max_workers = max_workers # Make the call queue slightly larger than the number of processes to # prevent the worker processes from idling. But don't make it too big # because futures in the call queue cannot be cancelled. self._call_queue = multiprocessing.Queue(self._max_workers + EXTRA_QUEUED_CALLS) # Killed worker processes can produce spurious "broken pipe" # tracebacks in the queue's own worker thread. But we detect killed # processes anyway, so silence the tracebacks. self._call_queue._ignore_epipe = True self._result_queue = SimpleQueue() self._work_ids = queue.Queue() self._queue_management_thread = None # Map of pids to processes self._processes = {} # Shutdown is a two-step process. self._shutdown_thread = False self._shutdown_lock = threading.Lock() self._broken = False self._queue_count = 0 self._pending_work_items = {} def _start_queue_management_thread(self): # When the executor gets lost, the weakref callback will wake up # the queue management thread. def weakref_cb(_, q=self._result_queue): q.put(None) if self._queue_management_thread is None: # Start the processes so that their sentinels are known. self._adjust_process_count() self._queue_management_thread = threading.Thread( target=_queue_management_worker, args=(weakref.ref(self, weakref_cb), self._processes, self._pending_work_items, self._work_ids, self._call_queue, self._result_queue)) self._queue_management_thread.daemon = True self._queue_management_thread.start() _threads_queues[self._queue_management_thread] = self._result_queue def _adjust_process_count(self): for _ in range(len(self._processes), self._max_workers): p = multiprocessing.Process( target=_process_worker, args=(self._call_queue, self._result_queue)) p.start() self._processes[p.pid] = p def submit(self, fn, *args, **kwargs): with self._shutdown_lock: if self._broken: raise BrokenProcessPool('A child process terminated ' 'abruptly, the process pool is not usable anymore') if self._shutdown_thread: raise RuntimeError('cannot schedule new futures after shutdown') f = _base.Future() w = _WorkItem(f, fn, args, kwargs) self._pending_work_items[self._queue_count] = w self._work_ids.put(self._queue_count) self._queue_count += 1 # Wake up queue management thread self._result_queue.put(None) self._start_queue_management_thread() return f submit.__doc__ = _base.Executor.submit.__doc__ def map(self, fn, *iterables, timeout=None, chunksize=1): """Returns an iterator equivalent to map(fn, iter). Args: fn: A callable that will take as many arguments as there are passed iterables. timeout: The maximum number of seconds to wait. If None, then there is no limit on the wait time. chunksize: If greater than one, the iterables will be chopped into chunks of size chunksize and submitted to the process pool. If set to one, the items in the list will be sent one at a time. Returns: An iterator equivalent to: map(func, *iterables) but the calls may be evaluated out-of-order. Raises: TimeoutError: If the entire result iterator could not be generated before the given timeout. Exception: If fn(*args) raises for any values. """ if chunksize < 1: raise ValueError("chunksize must be >= 1.") results = super(ProcessPoolExecutor, self).map(partial(_process_chunk, fn), _get_chunks(*iterables, chunksize=chunksize), timeout=timeout) return itertools.chain.from_iterable(results) def shutdown(self, wait=True): with self._shutdown_lock: self._shutdown_thread = True if self._queue_management_thread: # Wake up queue management thread self._result_queue.put(None) if wait: self._queue_management_thread.join() # To reduce the risk of opening too many files, remove references to # objects that use file descriptors. self._queue_management_thread = None self._call_queue = None self._result_queue = None self._processes = None shutdown.__doc__ = _base.Executor.shutdown.__doc__
queue_out = SimpleQueue() procs = [] for i in xrange(NUM_PROCS): p = Process(target=worker, args=(queue_in, queue_out)) p.start() procs.append(p) collector_proc = Process(target=collector, args=(queue_out, )) collector_proc.start() say("\n") say("Reading raw xml file: {}\n".format(sys.argv[1])) cnt = 0 fopen = lambda x: gzip.open(x) if x.endswith(".gz") else open(x) with fopen(sys.argv[1]) as fin: for line in fin: line = line.strip() if line.startswith("<row Id=\""): queue_in.put(line) cnt += 1 if cnt % 1000 == 0: say("\r{} lines processed".format(cnt)) say("\nDone.\n") for i in xrange(NUM_PROCS): queue_in.put(None) for p in procs: p.join() collector_proc.join() graph.close()
class ProcessPoolExecutor(_base.Executor): def __init__(self, max_workers=None): """Initializes a new ProcessPoolExecutor instance. Args: max_workers: The maximum number of processes that can be used to execute the given calls. If None or not given then as many worker processes will be created as the machine has processors. """ _check_system_limits() if max_workers is None: self._max_workers = multiprocessing.cpu_count() or 1 else: if max_workers <= 0: raise ValueError("max_workers must be greater than 0") self._max_workers = max_workers # Make the call queue slightly larger than the number of processes to # prevent the worker processes from idling. But don't make it too big # because futures in the call queue cannot be cancelled. self._call_queue = multiprocessing.Queue(self._max_workers + EXTRA_QUEUED_CALLS) # Killed worker processes can produce spurious "broken pipe" # tracebacks in the queue's own worker thread. But we detect killed # processes anyway, so silence the tracebacks. self._call_queue._ignore_epipe = True self._result_queue = SimpleQueue() self._work_ids = queue.Queue() self._queue_management_thread = None # Map of pids to processes self._processes = {} # Shutdown is a two-step process. self._shutdown_thread = False self._shutdown_lock = threading.Lock() self._broken = False self._queue_count = 0 self._pending_work_items = {} def _start_queue_management_thread(self): # When the executor gets lost, the weakref callback will wake up # the queue management thread. def weakref_cb(_, q=self._result_queue): q.put(None) if self._queue_management_thread is None: # Start the processes so that their sentinels are known. self._adjust_process_count() self._queue_management_thread = threading.Thread( target=_queue_management_worker, args=(weakref.ref(self, weakref_cb), self._processes, self._pending_work_items, self._work_ids, self._call_queue, self._result_queue)) self._queue_management_thread.daemon = True self._queue_management_thread.start() _threads_queues[self._queue_management_thread] = self._result_queue def _adjust_process_count(self): for _ in range(len(self._processes), self._max_workers): p = multiprocessing.Process(target=_process_worker, args=(self._call_queue, self._result_queue)) p.start() self._processes[p.pid] = p def submit(self, fn, *args, **kwargs): with self._shutdown_lock: if self._broken: raise BrokenProcessPool( 'A child process terminated ' 'abruptly, the process pool is not usable anymore') if self._shutdown_thread: raise RuntimeError( 'cannot schedule new futures after shutdown') f = _base.Future() w = _WorkItem(f, fn, args, kwargs) self._pending_work_items[self._queue_count] = w self._work_ids.put(self._queue_count) self._queue_count += 1 # Wake up queue management thread self._result_queue.put(None) self._start_queue_management_thread() return f submit.__doc__ = _base.Executor.submit.__doc__ def map(self, fn, *iterables, timeout=None, chunksize=1): """Returns an iterator equivalent to map(fn, iter). Args: fn: A callable that will take as many arguments as there are passed iterables. timeout: The maximum number of seconds to wait. If None, then there is no limit on the wait time. chunksize: If greater than one, the iterables will be chopped into chunks of size chunksize and submitted to the process pool. If set to one, the items in the list will be sent one at a time. Returns: An iterator equivalent to: map(func, *iterables) but the calls may be evaluated out-of-order. Raises: TimeoutError: If the entire result iterator could not be generated before the given timeout. Exception: If fn(*args) raises for any values. """ if chunksize < 1: raise ValueError("chunksize must be >= 1.") results = super(ProcessPoolExecutor, self).map(partial(_process_chunk, fn), _get_chunks(*iterables, chunksize=chunksize), timeout=timeout) return itertools.chain.from_iterable(results) def shutdown(self, wait=True): with self._shutdown_lock: self._shutdown_thread = True if self._queue_management_thread: # Wake up queue management thread self._result_queue.put(None) if wait: self._queue_management_thread.join() # To reduce the risk of opening too many files, remove references to # objects that use file descriptors. self._queue_management_thread = None self._call_queue = None self._result_queue = None self._processes = None shutdown.__doc__ = _base.Executor.shutdown.__doc__
def spawn_import_clients(options, files_info): # Spawn one reader process for each db.table, as well as many client processes task_queue = SimpleQueue() error_queue = SimpleQueue() exit_event = multiprocessing.Event() interrupt_event = multiprocessing.Event() errors = [] reader_procs = [] client_procs = [] parent_pid = os.getpid() signal.signal(signal.SIGINT, lambda a, b: abort_import(a, b, parent_pid, exit_event, task_queue, client_procs, interrupt_event)) try: progress_info = [] rows_written = multiprocessing.Value(ctypes.c_longlong, 0) for i in xrange(options["clients"]): client_procs.append(multiprocessing.Process(target=client_process, args=(options["host"], options["port"], options["auth_key"], task_queue, error_queue, rows_written, options["force"], options["durability"]))) client_procs[-1].start() for file_info in files_info: progress_info.append((multiprocessing.Value(ctypes.c_longlong, -1), # Current lines/bytes processed multiprocessing.Value(ctypes.c_longlong, 0))) # Total lines/bytes to process reader_procs.append(multiprocessing.Process(target=table_reader, args=(options, file_info, task_queue, error_queue, progress_info[-1], exit_event))) reader_procs[-1].start() # Wait for all reader processes to finish - hooray, polling while len(reader_procs) > 0: time.sleep(0.1) # If an error has occurred, exit out early if not error_queue.empty(): exit_event.set() reader_procs = [proc for proc in reader_procs if proc.is_alive()] update_progress(progress_info) # Wait for all clients to finish alive_clients = sum([client.is_alive() for client in client_procs]) for i in xrange(alive_clients): task_queue.put("exit") while len(client_procs) > 0: time.sleep(0.1) client_procs = [client for client in client_procs if client.is_alive()] # If we were successful, make sure 100% progress is reported if error_queue.empty() and not interrupt_event.is_set(): print_progress(1.0) def plural(num, text): return "%d %s%s" % (num, text, "" if num == 1 else "s") # Continue past the progress output line print("") print("%s imported in %s" % (plural(rows_written.value, "row"), plural(len(files_info), "table"))) finally: signal.signal(signal.SIGINT, signal.SIG_DFL) if interrupt_event.is_set(): raise RuntimeError("Interrupted") if not task_queue.empty(): error_queue.put((RuntimeError, RuntimeError("Error: Items remaining in the task queue"), None)) if not error_queue.empty(): # multiprocessing queues don't handling tracebacks, so they've already been stringified in the queue while not error_queue.empty(): error = error_queue.get() print("%s" % error[1], file=sys.stderr) if options["debug"]: print("%s traceback: %s" % (error[0].__name__, error[2]), file=sys.stderr) if len(error) == 4: print("In file: %s" % error[3], file=sys.stderr) raise RuntimeError("Errors occurred during import")
############################################################################################################################################ #open tree file if args.treeFile: treeFile = gzip.open(args.treeFile, "r") if args.treeFile.endswith(".gz") else open(args.treeFile, "r") else: treeFile = sys.stdin line = treeFile.readline() ########################################################################################################################################## while len(line) >= 1: lineQueue.put((linesQueued,line.rstrip())) linesQueued += 1 line = treeFile.readline() ############################################################################################################################################ ### wait for queues to empty print >> sys.stderr, "\nFinished reading trees...\n" while resultsHandled < linesQueued: sleep(1) sleep(5)
class Arbiter(object): SIG_NAMES = dict( (getattr(signal, 'SIG%s' % name), name.lower()) for name in 'TTIN TTOU TERM USR2'.split() ) SIGNALS = SIG_NAMES.keys() def __init__(self, app, config): self.app = app self.config = config self.workers = [] self.setup() def setup(self): self.pid = os.getpid() self.worker_nums = self.config['workers'] self.worker_class = SyncWorker self.queue = SimpleQueue() self.setup_logger() self.setup_signals() addresses = self.config['binds'] self.sockets = create_sockets(addresses, self.logger) addresses_str = ', '.join(map(format_addr_str, addresses)) self.logger.info('Arbiter booted') self.logger.info('Listening on: %s (%s)', addresses_str, self.pid) self.logger.info('Using worker: %s', self.worker_class) def setup_logger(self): logging.basicConfig(level=logging.DEBUG, format='%(asctime)-15s [%(process)d] [%(levelname)s] %(message)s', handlers=[logging.StreamHandler()]) self.logger = logging.getLogger(__name__) def setup_signals(self): [signal.signal(sig, self.handle_signal) for sig in self.SIGNALS] def handle_signal(self, signum, frame): self.queue.put(signum) def run(self): self.spawn_workers() while True: try: signum = self.queue.get() # blocking signame = self.SIG_NAMES.get(signum) handler = getattr(self, 'handle_%s' % signame, None) if not handler: self.logger.error('No handler for signal: %s', signame) continue self.logger.info('Handling signal: %s', signame) handler() except KeyboardInterrupt: self.stop() def stop(self): self.logger.info('Stopping') for worker in self.workers: self.kill_worker(worker) for sock in self.sockets: sock.close() sys.exit(0) def spawn_worker(self): args = (self.app, self.sockets, self.logger, self.config) return Process(target=self.worker_class.create, args=args) def spawn_workers(self): diff = self.worker_nums - len(self.workers) for n in range(diff): worker = self.spawn_worker() self.workers.append(worker) worker.start() self.logger.info('Botting worker: %s', worker.pid) def kill_worker(self, worker): self.logger.info('Killing worker: %s' % worker.pid) worker.terminate() worker.join() def handle_ttin(self): self.worker_nums += 1 self.spawn_workers() def handle_ttou(self): if self.worker_nums <= 1: return worker = self.workers.pop(0) self.kill_worker(worker) self.worker_nums -= 1
class GroupProcess(): def __init__(self, instruction, tables): self._instruction = instruction self._tables = tables self.input_interface = Queue() self.output_interfaces = {} self._instruction_pipelines = [] self._is_atomic_enabled = False self._is_sequential_enabled = False self._is_concurrent_enabled = False self._setup() def _setup(self): if isinstance(self._instruction, I.ATM): self._code = self._instruction.code self._instruction_pipelines.append(Pipeline(self._code.instructions, self._tables)) self._atomic_process = Process(target=self.run_atomic) self._is_atomic_enabled = True elif isinstance(self._instruction, I.SEQ): self._code = self._instruction.code self._instruction_pipelines.append(Pipeline(self._code.instructions, self._tables)) self._sequential_ingress_process = Process(target=self.run_sequential_ingress) self._sequential_egress_process = Process(target=self.run_sequential_egress) self._metadata_queue = Queue() self._is_sequential_enabled = True elif isinstance(self._instruction, I.CNC): # Note: CNC can't have PUSH/POP instructions in its code blocks. They violate the concurrency invariant. self._codes = self._instruction.codes self._modified_locations = [] self._modified_reserved_fields = [] self._modified_fields = [] for code in self._codes: self._instruction_pipelines.append(Pipeline(code.instructions, self._tables)) self._modified_locations.append(get_modified_locations(code.instructions)) self._modified_reserved_fields.append(get_modified_reserved_fields(code.instructions)) self._modified_fields.append(get_modified_fields(code.instructions, code.argument_fields)) self._concurrent_ingress_process = Process(target=self.run_concurrent_ingress) self._concurrent_egress_process = Process(target=self.run_concurrent_egress) self._metadata_queue = Queue() self._is_concurrent_enabled = True else: raise RuntimeError() def start(self): for instruction_pipeline in self._instruction_pipelines: instruction_pipeline.start() if self._is_atomic_enabled: self._atomic_process.start() elif self._is_sequential_enabled: self._sequential_ingress_process.start() self._sequential_egress_process.start() elif self._is_concurrent_enabled: self._concurrent_ingress_process.start() self._concurrent_egress_process.start() else: raise RuntimeError() def stop(self): self.input_interface.put(None) for instruction_pipeline in self._instruction_pipelines: instruction_pipeline.stop() if self._is_atomic_enabled: self._atomic_process.join() elif self._is_sequential_enabled: self._metadata_queue.put(None) self._sequential_ingress_process.join() self._sequential_egress_process.join() elif self._is_concurrent_enabled: self._metadata_queue.put(None) self._concurrent_ingress_process.join() self._concurrent_egress_process.join() else: raise RuntimeError() def run_atomic(self): instruction_pipeline = self._instruction_pipelines[0] while True: try: state = self.input_interface.get() # print 'atomic_group_process' if state is None: return ''' Save the current header ''' header = state.header state.header = Header() for field in self._code.argument_fields: state.header[field] = header[field] for field in get_reserved_fields(): state.header[field] = header[field] ''' Process the pipeline ''' instruction_pipeline.put(state) state = instruction_pipeline.get() ''' Commit changes to the current header ''' for field in self._code.argument_fields: header[field] = state.header[field] for field in get_reserved_fields(): header[field] = state.header[field] state.header = header self.output_interfaces[state.label].put(state) except KeyboardInterrupt: break def run_sequential_ingress(self): instruction_pipeline = self._instruction_pipelines[0] while True: try: state = self.input_interface.get() # print 'sequential_group_ingress_process' if state is None: return ''' Save the current header ''' header = state.header self._metadata_queue.put(header) state.header = Header() for field in self._code.argument_fields: state.header[field] = header[field] for field in get_reserved_fields(): state.header[field] = header[field] instruction_pipeline.put(state) except KeyboardInterrupt: break def run_sequential_egress(self): instruction_pipeline = self._instruction_pipelines[0] while True: try: header = self._metadata_queue.get() # print 'sequential_group_egress_process' if header is None: return state = instruction_pipeline.get() ''' Commit changes to the original header ''' for field in self._code.argument_fields: header[field] = state.header[field] for field in get_reserved_fields(): header[field] = state.header[field] state.header = header self.output_interfaces[state.label].put(state) except KeyboardInterrupt: break def run_concurrent_ingress(self): while True: try: state = self.input_interface.get() # print 'concurrent_group_ingress_process' if state is None: return ''' Save the current header ''' header = state.header self._metadata_queue.put(state) for i in range(len(self._instruction_pipelines)): state.header = Header() for field in self._codes[i].argument_fields: state.header[field] = header[field] for field in get_reserved_fields(): state.header[field] = header[field] self._instruction_pipelines[i].put(state) except KeyboardInterrupt: break def run_concurrent_egress(self): while True: try: state = self._metadata_queue.get() # print 'concurrent_group_egress_process' if state is None: return for i in range(len(self._instruction_pipelines)): _state = self._instruction_pipelines[i].get() ''' Commit changes to the original header ''' # Note: we assume that fields and locations are unique across different legs of CNC for field in self._modified_fields[i]: state.header[field] = _state.header[field] for field in self._modified_reserved_fields[i]: state.header[field] = _state.header[field] for location in self._modified_locations[i]: offset_value = location.offset.value length_value = location.length.value state.header.packet[offset_value.value:(offset_value.value + length_value.value)] = \ _state.packet[offset_value.value:(offset_value.value + length_value.value)] self.output_interfaces[state.label].put(state) except KeyboardInterrupt: break
class PrimitiveProcess(Process): def __init__(self, instruction): super(PrimitiveProcess, self).__init__() # self.daemon = True self._instruction = instruction self.input_interface = Queue() self.output_interfaces = {} self._run = None if isinstance(self._instruction, I.ID): self._run = execute_ID elif isinstance(self._instruction, I.DRP): self._run = partial(execute_DRP, reason=self._instruction.reason) elif isinstance(self._instruction, I.CTR): self._run = partial(execute_CTR, reason=self._instruction.reason) elif isinstance(self._instruction, I.ADD): self._run = partial(execute_ADD, field=self._instruction.field, size=self._instruction.size) elif isinstance(self._instruction, I.RMV): self._run = partial(execute_RMV, field=self._instruction.field) elif isinstance(self._instruction, I.LD): self._run = partial(execute_LD, destination=self._instruction.destination, source=self._instruction.source) elif isinstance(self._instruction, I.ST): self._run = partial(execute_ST, location=self._instruction.location, source=self._instruction.source) elif isinstance(self._instruction, I.OP): self._run = partial(execute_OP, destination=self._instruction.destination, left_source=self._instruction.left_source, operator=self._instruction.operator, right_source=self._instruction.right_source) elif isinstance(self._instruction, I.PUSH): self._run = partial(execute_PUSH, location=self._instruction.location) elif isinstance(self._instruction, I.POP): self._run = partial(execute_POP, location=self._instruction.location) elif isinstance(self._instruction, I.BR): self._run = partial(execute_BR, left_source=self._instruction.left_source, operator=self._instruction.operator, right_source=self._instruction.right_source, label=self._instruction.label) elif isinstance(self._instruction, I.JMP): self._run = partial(execute_JMP, label=self._instruction.label) elif isinstance(self._instruction, I.LBL): self._run = execute_LBL elif isinstance(self._instruction, I.LDt): self.table_interface = TableInterface() self._run = partial(execute_LDt, table_interface=self.table_interface, instruction=self._instruction, destinations=self._instruction.destinations, index=self._instruction.index) elif isinstance(self._instruction, I.STt): self.table_interface = TableInterface() self._run = partial(execute_STt, table_interface=self.table_interface, instruction=self._instruction, index=self._instruction.index, sources=self._instruction.sources) elif isinstance(self._instruction, I.INCt): self.table_interface = TableInterface() self._run = partial(execute_INCt, table_interface=self.table_interface, instruction=self._instruction, index=self._instruction.index) elif isinstance(self._instruction, I.LKt): self.table_interface = TableInterface() self._run = partial(execute_LKt, table_interface=self.table_interface, instruction=self._instruction, index=self._instruction.index, sources=self._instruction.sources) elif isinstance(self._instruction, I.CRC): self._run = partial(execute_CRC, destination=self._instruction.destination, sources=self._instruction.sources) elif isinstance(self._instruction, I.HSH): self._run = partial(execute_HSH, destination=self._instruction.destination, sources=self._instruction.sources) elif isinstance(self._instruction, I.HLT): self._run = execute_HLT else: raise RuntimeError() def stop(self): self.input_interface.put(None) self.join() def run(self): while True: try: state = self.input_interface.get() # print 'primitive_process' if state is None: return state = self._run(state) self.output_interfaces[state.label].put(state) except KeyboardInterrupt: break
writerThread.start() '''start background Thread that will run a loop to check run statistics and print We use thread, because I think this is necessary for a process that watches global variables like linesTested''' checkerThread = Thread(target=checkStats) checkerThread.daemon = True checkerThread.start() ######################################################################################################################## #generate slices and queue fileSlices = fileSlicer(genoFile, 1000000) if not args.test: for fileSlice in fileSlices: inQueue.put((slicesQueued,fileSlice)) slicesQueued += 1 else: for fileSlice in fileSlices: inQueue.put((slicesQueued,fileSlice)) slicesQueued += 1 if slicesQueued == 10: break ############################################################################################################################################ #Now we send completion signals to all worker threads for x in range(args.threads): inQueue.put((-1,None,)) # -1 tells the threads to break sys.stderr.write("\nClosing worker threads\n".format(args.threads))
class ProcessPoolExecutor(_base.Executor): def __init__(self, max_workers=None): """Initializes a new ProcessPoolExecutor instance. Args: max_workers: The maximum number of processes that can be used to execute the given calls. If None or not given then as many worker processes will be created as the machine has processors. """ _check_system_limits() if max_workers is None: self._max_workers = multiprocessing.cpu_count() else: self._max_workers = max_workers # Make the call queue slightly larger than the number of processes to # prevent the worker processes from idling. But don't make it too big # because futures in the call queue cannot be cancelled. self._call_queue = multiprocessing.Queue(self._max_workers + EXTRA_QUEUED_CALLS) # Killed worker processes can produce spurious "broken pipe" # tracebacks in the queue's own worker thread. But we detect killed # processes anyway, so silence the tracebacks. self._call_queue._ignore_epipe = True self._result_queue = SimpleQueue() self._work_ids = queue.Queue() self._queue_management_thread = None # Map of pids to processes self._processes = {} # Shutdown is a two-step process. self._shutdown_thread = False self._shutdown_lock = threading.Lock() self._broken = False self._queue_count = 0 self._pending_work_items = {} def _start_queue_management_thread(self): # When the executor gets lost, the weakref callback will wake up # the queue management thread. def weakref_cb(_, q=self._result_queue): q.put(None) if self._queue_management_thread is None: # Start the processes so that their sentinels are known. self._adjust_process_count() self._queue_management_thread = threading.Thread( target=_queue_management_worker, args=(weakref.ref(self, weakref_cb), self._processes, self._pending_work_items, self._work_ids, self._call_queue, self._result_queue)) self._queue_management_thread.daemon = True self._queue_management_thread.start() _threads_queues[self._queue_management_thread] = self._result_queue def _adjust_process_count(self): for _ in range(len(self._processes), self._max_workers): p = multiprocessing.Process( target=_process_worker, args=(self._call_queue, self._result_queue)) p.start() self._processes[p.pid] = p def submit(self, fn, *args, **kwargs): with self._shutdown_lock: if self._broken: raise BrokenProcessPool('A child process terminated ' 'abruptly, the process pool is not usable anymore') if self._shutdown_thread: raise RuntimeError('cannot schedule new futures after shutdown') f = _base.Future() w = _WorkItem(f, fn, args, kwargs) self._pending_work_items[self._queue_count] = w self._work_ids.put(self._queue_count) self._queue_count += 1 # Wake up queue management thread self._result_queue.put(None) self._start_queue_management_thread() return f submit.__doc__ = _base.Executor.submit.__doc__ def shutdown(self, wait=True): with self._shutdown_lock: self._shutdown_thread = True if self._queue_management_thread: # Wake up queue management thread self._result_queue.put(None) if wait: self._queue_management_thread.join() # To reduce the risk of opening too many files, remove references to # objects that use file descriptors. self._queue_management_thread = None self._call_queue = None self._result_queue = None self._processes = None shutdown.__doc__ = _base.Executor.shutdown.__doc__
######################################################################################################################## #place lines into pods #pass pods on to processor(s) podSize = args.podSize lineGen = lineReader(In) pod = [] podNumber = 0 for line in lineGen: linesRead += 1 pod.append((linesRead, line)) if linesRead % podSize == 0: inQueue.put((podNumber, pod)) if verbose: print >> sys.stderr, "Pod", podNumber, "sent for analysis..." podNumber += 1 podsQueued += 1 pod = [] #run remaining lines in pod if len(pod) > 0: inQueue.put((podNumber, pod)) podsQueued += 1 if verbose: print >> sys.stderr, "Pod", podNumber, "sent for analysis..." #Wait for analysis to finish
checkerThread.daemon = True checkerThread.start() ########################################################################################################################## outFile.write(args.outSep.join(["#CHROM", "POS"] + [s for samples in _samples_ for s in samples]) + "\n") '''now we go through assuming all files are ordered as in the fai. if we don't find the line we're looking for we move on to the next''' for scaf in scafs: if (exclude and scaf in exclude) or (include and scaf not in include): continue starts = range(1,scafLens[scaf] + 1, args.windSize) ends = [s + args.windSize - 1 for s in starts] for x in range(len(starts)): inQueue.put((windowsQueued,scaf,starts[x],ends[x],)) windowsQueued += 1 if args.test and windowsQueued == 10: break if args.test and windowsQueued == 10: break ############################################################################################################################################ #Now we send completion signals to all worker threads for x in range(args.threads): inQueue.put((-1,None,None,None,)) # -1 tells the threads to break #and wait for all to finish for x in range(len(workerThreads)): workerThreads[x].join() sorterThread.join()
def export_table(db, table, directory, options, error_queue, progress_info, sindex_counter, hook_counter, exit_event): signal.signal( signal.SIGINT, signal.SIG_DFL ) # prevent signal handlers from being set in child processes writer = None try: # -- get table info table_info = options.retryQuery('table info: %s.%s' % (db, table), query.db(db).table(table).info()) # Rather than just the index names, store all index information table_info['indexes'] = options.retryQuery( 'table index data %s.%s' % (db, table), query.db(db).table(table).index_status(), run_options={'binary_format': 'raw'}) sindex_counter.value += len(table_info["indexes"]) table_info['write_hook'] = options.retryQuery( 'table write hook data %s.%s' % (db, table), query.db(db).table(table).get_write_hook(), run_options={'binary_format': 'raw'}) if table_info['write_hook'] is not None: hook_counter.value += 1 with open(os.path.join(directory, db, table + '.info'), 'w') as info_file: info_file.write(json.dumps(table_info) + "\n") with sindex_counter.get_lock(): sindex_counter.value += len(table_info["indexes"]) # -- start the writer if six.PY3: ctx = multiprocessing.get_context( multiprocessing.get_start_method()) task_queue = SimpleQueue(ctx=ctx) else: task_queue = SimpleQueue() writer = None if options.format == "json": filename = directory + "/%s/%s.json" % (db, table) writer = multiprocessing.Process(target=json_writer, args=(filename, options.fields, task_queue, error_queue, options.format)) elif options.format == "csv": filename = directory + "/%s/%s.csv" % (db, table) writer = multiprocessing.Process(target=csv_writer, args=(filename, options.fields, options.delimiter, task_queue, error_queue)) elif options.format == "ndjson": filename = directory + "/%s/%s.ndjson" % (db, table) writer = multiprocessing.Process(target=json_writer, args=(filename, options.fields, task_queue, error_queue, options.format)) else: raise RuntimeError("unknown format type: %s" % options.format) writer.start() # -- read in the data source # - lastPrimaryKey = None read_rows = 0 run_options = {"time_format": "raw", "binary_format": "raw"} if options.outdated: run_options["read_mode"] = "outdated" cursor = options.retryQuery('inital cursor for %s.%s' % (db, table), query.db(db).table(table).order_by( index=table_info["primary_key"]), run_options=run_options) while not exit_event.is_set(): try: for row in cursor: # bail on exit if exit_event.is_set(): break # add to the output queue task_queue.put([row]) lastPrimaryKey = row[table_info["primary_key"]] read_rows += 1 # Update the progress every 20 rows if read_rows % 20 == 0: progress_info[0].value = read_rows else: # Export is done - since we used estimates earlier, update the actual table size progress_info[0].value = read_rows progress_info[1].value = read_rows break except (errors.ReqlTimeoutError, errors.ReqlDriverError): # connection problem, re-setup the cursor try: cursor.close() except errors.ReqlError as exc: default_logger.exception(exc) cursor = options.retryQuery( 'backup cursor for %s.%s' % (db, table), query.db(db).table(table).between( lastPrimaryKey, None, left_bound="open").order_by( index=table_info["primary_key"]), run_options=run_options) except (errors.ReqlError, errors.ReqlDriverError) as ex: error_queue.put((RuntimeError, RuntimeError(ex.message), traceback.extract_tb(sys.exc_info()[2]))) except BaseException: ex_type, ex_class, tb = sys.exc_info() error_queue.put((ex_type, ex_class, traceback.extract_tb(tb))) finally: if writer and writer.is_alive(): task_queue.put(StopIteration()) writer.join()
exclude=scafsToExclude) elif windType == "sites": windowGenerator = genomics.slidingSitesWindows(genoFile, windSize, overlap, maxDist, minSites, sampleData.indNames, include=scafsToInclude, exclude=scafsToExclude) else: windowGenerator = genomics.predefinedCoordWindows(genoFile, windCoords, sampleData.indNames) for window in windowGenerator: windowQueue.put((windowsQueued, window)) windowsQueued += 1 ############################################################################################################################################ print >> sys.stderr, "\nWriting final results...\n" while resultsHandled < windowsQueued: sleep(1) sleep(5) genoFile.close() outFile.close() print >> sys.stderr, str(windowsQueued), "windows were tested.\n" print >> sys.stderr, str(resultsWritten), "results were written.\n"
l.acquire() ip_range=input("Enter the range of devices to retrieve configs from\n>") l.release() ##hosts=scan(ip_range) for host in hosts: if host.ssh: action_queue.put(['0'],device(host)) task_count=task_count+1 else: continue ##When all actions have been queued, exit the main loop else: break ##Signals to the processes that when the queue is empty, they can exit check_queue.put(True) ##Waits for the action queue to finish action_queue.join() ##Reports failures to the user while not error_queue.empty(): l.acquire() print ('FAILED: '+error_queue.get().address) host_list.write('FAILED: '+error_queue.get().address) l.release() ##Waits for user input to allow exit l.acquire() input_at_end=input('Press enter to exit') l.release()