def loop_test(network, device, transformer, img_q: Queue, bbox_q: Queue, threshold=0.35): scale = None print(f"NETWORK IS NONE {type(network)}") print("STARTING TO SPIN DETECT LOOP") while True: print("WAIT") image = img_q.get() print("RECV") if type(image) is str and image == "DONE": del image break print("CHECK") boxes = detect_face(image, network, transformer, device, threshold) print("SENDING") bbox_q.put(boxes) print("SENT") # DONT FORGET TO CLEANUP del image img_q.close() bbox_q.close() print("BYE")
def __detector_process(detector_cfg, recivq: Queue, sendqs, timeout, run_semaphore, pause_event): detector = __build_detector_component(detector_cfg) logger = get_logger() logger.info('create ' + str(detector_cfg['type'])) try: while True: if not run_semaphore.value: logger.info('通过信号量停止了detector') break pause_event.wait() kwargs = recivq.get(timeout=timeout) kwargs = detector(**kwargs) # 因为后续可能是backbones也可能是tracker所以使用list来发送 for sendq in sendqs: sendq.put(kwargs, timeout=timeout) except KeyboardInterrupt: logger.info('user stop the detector process') except Empty: logger.info('head不再发送数据detector自动释放') except Full: logger.exception('通向某一条主干或者跟踪器的队列已满') # except Exception as e: # logger.exception(e) finally: logger.info('release the detector source') del detector # 清除探测器对象 del logger torch.cuda.empty_cache() # 清空GPU缓存,防止出现进程STOP占用显存 recivq.cancel_join_thread() for sendq in sendqs: sendq.cancel_join_thread() sendq.close() recivq.close() return
def _run_game(process_id: int, game_factory: GameExecutorFactory, network: nn.Module, device: torch.device, request_queue: Queue, experience_queue: Queue, batch_size: int, transfer_blocks: int, transfer_to_device: bool) -> None: exploration_rate = 1. game = game_factory.create() print('* worker %d started' % process_id) while True: try: if not request_queue.empty(): request: _RunGameRequest = request_queue.get(block=False) if request.do_terminate: print('* game worker %d terminated' % process_id) experience_queue.close() request_queue.close() return if request.set_exploration_rate is not None: exploration_rate = request.set_exploration_rate block = [] for _ in range(transfer_blocks): eps, exps = game.multi_step(network, device, exploration_rate, batch_size) if transfer_to_device: exps = [ e.to_device(device, non_blocking=False) for e in exps ] block.append((eps, exps)) experience_queue.put(block, block=True) except Exception as e: print('error in worker %d: ' % process_id, e)
def multiprocess_training_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file, _fasttext_vocab_cached_mapping, _fasttext_vocab_cached_data): # workflow: we tokenize the data files with the costly spacy before training in a preprocessing step # (and concat the tokens with single whitespaces), so here we only split on the whitepsaces _tokenizer = None if _config["preprocessed_tokenized"] == True: _tokenizer = WordTokenizer(word_splitter=JustSpacesWordSplitter()) if _config["token_embedder_type"] == "embedding": _token_indexers = { "tokens": SingleIdTokenIndexer(lowercase_tokens=True) } _vocab = Vocabulary.from_files(_config["vocab_directory"]) elif _config["token_embedder_type"] == "fasttext": _token_indexers = { "tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"]) } _vocab = FastTextVocab(_fasttext_vocab_cached_mapping, _fasttext_vocab_cached_data, _config["fasttext_max_subwords"]) elif _config["token_embedder_type"] == "elmo": _token_indexers = {"tokens": ELMoTokenCharactersIndexer()} _vocab = None _triple_loader = IrTripleDatasetReader( lazy=True, tokenizer=_tokenizer, token_indexers=_token_indexers, max_doc_length=_config["max_doc_length"], max_query_length=_config["max_query_length"]) _iterator = BucketIterator(batch_size=int(_config["batch_size_train"]), sorting_keys=[("doc_pos_tokens", "num_tokens"), ("doc_neg_tokens", "num_tokens")]) _iterator.index_with(_vocab) for training_batch in _iterator(_triple_loader.read(_local_file), num_epochs=1): _queue.put( training_batch) # this moves the tensors in to shared memory _queue.close() # indicate this local thread is done _wait_for_exit.wait( ) # keep this process alive until all the shared memory is used and not needed anymore
class IterableParquetDataset(IterableDataset): def __init__(self, path, process_func): super().__init__() dataset = ds.dataset(path) self.process_func = process_func self.batches = Queue() [self.batches.put(batch) for batch in dataset.to_batches()] def __iter__(self): while True: if self.batches.empty() == True: self.batches.close() break batch = self.batches.get().to_pydict() batch.update(self.process_func(batch)) yield batch
def dynamic_power(model, input_shape): q = Queue() power_return = Queue() interval_return = Queue() latency_return = Queue() input_tensor_queue = Queue() model_queue = Queue() input_tensor = torch.ones([*input_shape]) input_tensor_queue.put(input_tensor) model.share_memory() model_queue.put(model) context = torch.multiprocessing.get_context('spawn') p_thread = context.Process(target=power_thread, args=(power_return, interval_return, q)) l_thread = context.Process(target=latency_thread, args=(model_queue, input_tensor_queue, latency_return, q)) l_thread.start() p_thread.start() power_l = list() # GPU power list interval_l = list() # power interval list latency_l = list() # latency list l_thread.join() while True: if not power_return.empty(): power_l.append(power_return.get()) if not interval_return.empty(): interval_l.append(interval_return.get()) if not latency_return.empty(): latency_l.append(latency_return.get()) if power_return.empty() and interval_return.empty( ) and latency_return.empty(): break power_return.close() interval_return.close() latency_return.close() q.close() del q del power_return del latency_return del interval_return return latency_l, power_l, interval_l
def __backbone_process(backbone_cfg: list, recivq: Queue, sendq: Queue, timeout, run_semaphore, pause_event): # 实例化一个backbone里面所有的组件 backbone_components = [__build_backbone_component(bbcfg) for bbcfg in backbone_cfg] logger = get_logger() logger.info('create backbone') try: while True: if not run_semaphore.value: logger.info('通过信号量停止了backbone') break pause_event.wait() kwargs = recivq.get(timeout=timeout) # 首先由该管道内的第一个组件处理数据 kwargs = backbone_components[0](**kwargs) if len(backbone_components) > 1: # 如果该管道有多个component的话依次将数据交给之后的component处理 for backbone_component in backbone_components[1:]: kwargs = backbone_component(**kwargs) # print('backbone sendq len is {}'.format(sendq.qsize())) if kwargs is not None: for img_info in kwargs['imgs_info']: sendq.put(img_info, timeout=timeout) except KeyboardInterrupt: logger.info('user stop a backbone_process process') except Empty: logger.info('backbone normal stoped') except Full as e: logger.exception(e) logger.warning('通向主进程的队列已满,请检查主进程是否正常取出数据') except Exception as e: logger.exception(e) logger.info('发生不可忽视的错误,因此强制停止整个后台程序运行,请检查log输出定位错误') # import signal # os.killpg(os.getpgid(os.getpid()), signal.SIGKILL) finally: logger.info('release backbone source') del logger recivq.cancel_join_thread() sendq.cancel_join_thread() recivq.close() sendq.close() return
def _worker( reader: DatasetReader, input_queue: Queue, output_queue: Queue, num_active_workers: Value, num_inflight_items: Value, worker_id: int, ) -> None: """ A worker that pulls filenames off the input queue, uses the dataset reader to read them, and places the generated instances on the output queue. When there are no filenames left on the input queue, it decrements num_active_workers to signal completion. """ logger.info(f"Reader worker: {worker_id} PID: {os.getpid()}") # Keep going until you get a file_path that's None. while True: file_path = input_queue.get() if file_path is None: # It's important that we close and join the queue here before # decrementing num_active_workers. Otherwise our parent may join us # before the queue's feeder thread has passed all buffered items to # the underlying pipe resulting in a deadlock. # # See: # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#pipes-and-queues # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#programming-guidelines output_queue.close() output_queue.join_thread() # Decrementing is not atomic. # See https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Value. with num_active_workers.get_lock(): num_active_workers.value -= 1 logger.info(f"Reader worker {worker_id} finished") break logger.info(f"reading instances from {file_path}") for instance in reader.read(file_path): with num_inflight_items.get_lock(): num_inflight_items.value += 1 output_queue.put(instance)
def __tracker_process(tracker_cfg, recivq: Queue, sendqs, timeout, run_semaphore, pause_event): tracker = __build_tracker_component(tracker_cfg) logger = get_logger() logger.info('create ' + str(tracker_cfg['type'])) try: while True: if not run_semaphore.value: logger.info('通过信号量停止了tracker') break pause_event.wait() kwargs = recivq.get(timeout=timeout) imgs, imgs_info = kwargs['imgs'], kwargs['imgs_info'] for index, (img, img_info) in enumerate(zip(imgs, imgs_info)): img_info = tracker(img, img_info) imgs_info[index] = img_info for sendq in sendqs: # print('tracker sendq len is {}'.format(sendq.qsize())) sendq.put({'imgs': imgs, 'imgs_info': imgs_info}, timeout=timeout) except KeyboardInterrupt: logger.info('user stop the detector process') except Empty: logger.info('detector不再发送数据tracker自动释放') except Full: logger.exception('通向某一条主干的队列已满') # except Exception as e: # logger.exception(e) finally: logger.info('release the tracker source') del tracker # 清除探测器对象 del logger torch.cuda.empty_cache() # 清空GPU缓存,防止出现进程STOP占用显存 recivq.cancel_join_thread() for sendq in sendqs: sendq.cancel_join_thread() sendq.close() recivq.close() return
def multiprocess_single_sequence_loader(process_number: int, _config, _queue: mp.Queue, _wait_for_exit: mp.Event, _local_file, _fasttext_vocab_cached_mapping, _fasttext_vocab_cached_data): torch.manual_seed(_config["random_seed"]) numpy.random.seed(_config["random_seed"]) random.seed(_config["random_seed"]) if _config["token_embedder_type"] == "bert_cls": _tokenizer = BlingFireTokenizer() _ind = PretrainedBertIndexer( pretrained_model=_config["bert_pretrained_model"], do_lowercase=True) _token_indexers = {"tokens": _ind} _tuple_loader = IrSingleSequenceDatasetReader( lazy=True, tokenizer=_tokenizer, token_indexers=_token_indexers, max_seq_length=_config["max_doc_length"], min_seq_length=_config["min_doc_length"], ) _iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]), sorting_keys=[("seq_tokens", "num_tokens")]) _iterator.index_with(Vocabulary.from_files(_config["vocab_directory"])) else: _tokenizer = BlingFireTokenizer() if _config["token_embedder_type"] == "embedding": _token_indexers = { "tokens": SingleIdTokenIndexer(lowercase_tokens=True) } _vocab = Vocabulary.from_files(_config["vocab_directory"]) elif _config["token_embedder_type"] == "fasttext": _token_indexers = { "tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"]) } _vocab = FastTextVocab(_fasttext_vocab_cached_mapping, _fasttext_vocab_cached_data, _config["fasttext_max_subwords"]) elif _config["token_embedder_type"] == "elmo": _token_indexers = {"tokens": ELMoTokenCharactersIndexer()} _vocab = None _tuple_loader = IrSingleSequenceDatasetReader( lazy=True, tokenizer=_tokenizer, token_indexers=_token_indexers, max_seq_length=_config["max_doc_length"], min_seq_length=_config["min_doc_length"], ) _iterator = BucketIterator(batch_size=int(_config["batch_size_eval"]), sorting_keys=[("seq_tokens", "num_tokens")]) _iterator.index_with(_vocab) for training_batch in _iterator(_tuple_loader.read(_local_file), num_epochs=1): _queue.put( training_batch) # this moves the tensors in to shared memory _queue.put(None) # signal end of queue _queue.close() # indicate this local thread is done _wait_for_exit.wait( ) # keep this process alive until all the shared memory is used and not needed anymore
def main(args): if args.labels: data = [] with open(args.data_dir, encoding="utf-8") as f: for line in csv.reader(f, delimiter="\t"): data.append(line) text, labels = list(zip(*data[1:])) else: text = [] with open(args.data_dir, encoding="utf-8") as f: for line in f.readlines(): text.append(line.strip()) labels = None if isinstance(text, tuple): text = list(text) if "imdb" in args.data_dir or "IMDB" in args.data_dir: text = [clean_for_imdb(t) for t in text] logger.info("Do back-translation for {} sentences".format(len(text))) if args.gpus is not None and len(args.gpus) > 1: logger.info("Use Multiple GPUs: {}".format(", ".join([str(i) for i in args.gpus]))) split_point = len(text) // len(args.gpus) text_splitted = [] for gpu_id in args.gpus: text_splitted.append(text[gpu_id * split_point : (gpu_id + 1) * split_point]) if gpu_id == len(args.gpus) - 1: text_splitted[-1] += text[(gpu_id + 1) * split_point :] assert sum(len(s) for s in text_splitted) == len(text) set_start_method("spawn") q = Queue() procs = [] for i in range(len(args.gpus)): proc = Process(target=multi_translate, args=(args, i, text_splitted[i], q)) procs.append(proc) proc.start() q_result = [] for p in procs: q_result.append(q.get()) back_translated_docs = [] for doc_split in sorted(q_result): back_translated_docs += doc_split[1] q.close() q.join_thread() for proc in procs: proc.join() else: if args.gpus is not None: gpu = args.gpus[0] logger.info("Use only one GPU: {}".format(gpu)) back_translated_docs = translate(args, text, args.gpus[0])[1] else: logger.info("Use cpu") back_translated_docs = translate(args, text) output_file_name = "bt_" + os.path.basename(args.data_dir) output_dir = os.path.join(args.output_dir, output_file_name) folder_name = os.path.dirname(output_dir) if not os.path.isdir(folder_name): os.makedirs(folder_name) if args.return_sentence_pair: # Save original sentence pair filename, ext = os.path.splitext(output_dir) with open(filename + ".pickle", "wb") as f: pickle.dump(back_translated_docs, f) # Save back-translated sentences bt_doc = [" ".join(list(zip(*d))[1]) for d in back_translated_docs] with open(output_dir, "wt") as f: if labels is not None: tsv_writer = csv.writer(f, delimiter="\t") tsv_writer.writerow(data[0]) for line, label in zip(bt_doc, labels): tsv_writer.writerow([line, label]) else: for line in bt_doc: f.write(line) f.write('\n') # Save cross sentences new_back_translated_docs = [] for doc in back_translated_docs: new_doc = [] for j, sent in enumerate(doc): if j % 2 == 0: new_doc.append(sent) else: new_doc.append(sent[::-1]) new_back_translated_docs.append(new_doc) new_docs1, new_docs2 = [], [] for doc in new_back_translated_docs: n1, n2 = list(zip(*doc)) new_docs1.append(" ".join(n1)) new_docs2.append(" ".join(n2)) filename, ext = os.path.splitext(output_dir) with open(filename + "_pair1" + ext, "wt") as f: if labels is not None: tsv_writer = csv.writer(f, delimiter="\t") tsv_writer.writerow(data[0]) for line, label in zip(new_docs1, labels): tsv_writer.writerow([line, label]) else: for line in new_docs1: f.write(line) f.write('\n') with open(filename + "_pair2" + ext, "wt") as f: if labels is not None: tsv_writer = csv.writer(f, delimiter="\t") tsv_writer.writerow(data[0]) for line, label in zip(new_docs2, labels): tsv_writer.writerow([line, label]) else: for line in new_docs2: f.write(line) f.write('\n') else: with open(output_dir, "wt") as f: if labels is not None: tsv_writer = csv.writer(f, delimiter="\t") tsv_writer.writerow(data[0]) for line, label in zip(back_translated_docs, labels): tsv_writer.writerow([line, label]) else: for line in back_translated_docs: f.write(line) f.write('\n') logger.info("Translated documents are saved in {}".format(output_dir))
class MultiprocessAsyncGameExecutor(AsyncGameExecutor): def __init__(self, game_factory: GameExecutorFactory, network: nn.Module, device: torch.device, processes: int, batches_ahead: int, batch_size: int, states_on_device: bool): self._states_on_device = states_on_device self._device = device self._experience_queue = Queue(maxsize=processes + 1) block_size = max(1, batches_ahead - processes) self.block_buffer = [] print('* starting %d workers (batch size: %d, block size: %d)' % (processes, batch_size, block_size)) self._processes = [] self._request_queues = [] for i in range(processes): request_queue = Queue(maxsize=10) # Transfer to GPU in the other process does not work.. it does not throw an error, but training does not converge p = Process(target=_run_game, args=( i, game_factory, network, device, request_queue, self._experience_queue, batch_size, block_size, False, )) p.start() self._request_queues.append(request_queue) self._processes.append(p) def _send_to_all(self, request, block=False): for request_queue in self._request_queues: request_queue.put(request, block=block) def get_experiences(self): if len(self.block_buffer) == 0: block_buffer = self._experience_queue.get(block=True) if self._states_on_device: for eps, exps in block_buffer: exps = [e.to_device(self._device) for e in exps] self.block_buffer.append((eps, exps)) else: self.block_buffer.extend(block_buffer) return self.block_buffer.pop() def update_exploration_rate(self, exploration_rate): self._send_to_all( _RunGameRequest(set_exploration_rate=exploration_rate), block=True) def close(self): print('* shutting down workers') self._send_to_all(_RunGameRequest(do_terminate=True)) # wake the workers try: while not self._experience_queue.empty(): try: self._experience_queue.get(block=False) except queue.Empty: pass except ConnectionResetError: pass except FileNotFoundError: pass self._experience_queue.close() for p in self._processes: p.join(1000) for q in self._request_queues: q.close() self._experience_queue.close()
class VideoProcessingPipeline(object): """ Manages the acquisition and preprocessing of video frames from the webcam. A pipeline with two processes is used: the first process denoises frames and queues the result to the second process which calculates the optical flows on CPU, and queues back the moving average to the main process. This moving average is used as attention prior by the model. """ def __init__(self, img_size, img_cfg, frames_window=13, flows_window=5, skip_frames=2, cam_res=(640, 480), denoising=True): """ :param img_size: the images input size of the neural network. :param img_cfg: the config parameters for image processing. :param frames_window: the number of webcam frames input at once into the neural network to make a prediction step. Best results tend to be obtained for roughly a bit less than one second. :param flows_window: the number of optical flows used to calculate an attention prior. Defaults to 5. Change at your own risks. :param skip_frames: down-sampling factor of the webcam frames. Defaults to 2 in order to roughly obtain 15 FPS with a 30 FPS webcam. This down-sampling is basic and could be improved to support ratios such as 2/3 to obtain 20 FPS. :param cam_res: webcam resolution (width, height). The application was only tested in 640x480. Change at your own risks. :param denoising: activate the denoising process. Defaults to True. Most usefull with low quality webcams. """ if frames_window not in [9, 13, 17, 21]: raise ValueError('Invalid window size for webcam frames: `%s`' % str(frames_window)) if flows_window not in [3, 5, 7, 9]: raise ValueError('Invalid window size for optical flows: `%s`' % str(flows_window)) if flows_window > frames_window: raise ValueError( 'Optical flow window cannot be wider than camera frames window' ) self.img_size = img_size # optical flows can be computed in lower resolution w/o harming results self.opt_size = img_size // 2 self.frames_window = frames_window self.flows_window = flows_window self.skip_frames = skip_frames self.total_frames = 0 # total number of frames acquired self.cam_res = cam_res self.denoising = denoising self.img_frames = [ np.zeros((self.img_size, self.img_size, 3), dtype=np.uint8) ] * (self.frames_window // 2) self.gray_frames = [ np.zeros((self.opt_size, self.opt_size), dtype=np.uint8) ] * (self.frames_window // 2) self.priors = [] # init multiprocessing self.q_parent, self.q_prior = Queue(), Queue() # start denoising process if self.denoising: self.q_denoise = Queue() self.p_denoise = Process( target=denoise_frame, args=(self.q_denoise, self.q_prior, img_cfg.getint('h'), img_cfg.getint('template_window_size'), img_cfg.getint('search_window_size'))) self.p_denoise.start() print('Denoising enabled') else: print('Denoising disabled') # start prior calculation process self.p_prior = Process(target=calc_attention_prior, args=(self.opt_size, self.flows_window, self.q_prior, self.q_parent)) self.p_prior.start() # initialise camera self.cap = cv.VideoCapture(0) if self.cap.isOpened(): self.cap_fps = int(round(self.cap.get(cv.CAP_PROP_FPS))) self.cap.set(3, self.cam_res[0]) self.cap.set(4, self.cam_res[1]) print('Device @%d FPS' % self.cap_fps) else: raise IOError('Failed to open webcam capture') # raw images self.last_frame = collections.deque(maxlen=self.cap_fps) # cropped region of the raw images self.last_cropped_frame = collections.deque(maxlen=self.cap_fps) # acquire and preprocess the exact number of frames needed # to make the first prior map for i in range((frames_window // 2) + 1): self.acquire_next_frame(enable_skip=False) # now wait for the first prior to be returned while len(self.priors) == 0: if not self.q_parent.empty(): # de-queue a prior prior, flow = self.q_parent.get(block=False) self.priors.append(prior) # sleep while the queue is empty time.sleep(0.01) def _center_crop(self, img, target_shape): """ Returns a center crop of the provided image. :param img: the image to crop. :param target_shape: the dimensions of the crop. :return the cropped image """ h, w = target_shape y, x = img.shape[:2] start_y = max(0, y // 2 - (h // 2)) start_x = max(0, x // 2 - (w // 2)) return img[start_y:start_y + h, start_x:start_x + w] def acquire_next_frame(self, enable_skip=True): """ Reads the next frame from the webcam and starts the asynchronous preprocessing. The video stream is down-sampled as necessary to reach the desired FPS. :param enable_skip: enables down-sampling of the webcam stream. Must be True except during initialisation. :return: the last frame acquired or None if that frame was skipped due to down-sampling of the webcam stream. """ ret, frame = self.cap.read() if not ret: self.terminate() raise IOError('Failed to read the next frame from webcam') self.total_frames += 1 if not enable_skip: return self._preprocess_frame(frame) elif (self.total_frames % self.skip_frames) == 0: return self._preprocess_frame(frame) return None def _preprocess_frame(self, frame): """ Crops, change to gray scale, resizes and sends the newly acquired webcam frame to the preprocessing pipeline. :param frame: the last acquired frame. :return the last acquired frame. """ # crop a square at the center of the frame rgb = cv.cvtColor(frame, cv.COLOR_BGR2RGB) rgb = self._center_crop(rgb, (self.cam_res[1], self.cam_res[1])) self.last_frame.append(frame) self.last_cropped_frame.append(rgb) # convert to gray scale and resize gray = cv.cvtColor(rgb, cv.COLOR_RGB2GRAY) gray = cv.resize(gray, (self.opt_size, self.opt_size)) rgb = cv.resize(rgb, (self.img_size, self.img_size)) # queue to relevant child process if self.denoising: self.q_denoise.put(gray) else: self.q_prior.put(gray) self.img_frames.append(rgb) self.gray_frames.append(gray) return frame def get_model_input(self, dequeue=True): """ Gets the list of images and the prior needed for the inference of the current frame. Use `dequeue` to retrieve the next prior from the queue. The caller must first verify that the queue is non-empty. :param dequeue: must be set to True except during initialisation. :return: images ndarray and the corresponding prior """ # de-queue a prior if dequeue: prior, flow = self.q_parent.get(block=False) self.priors.append(prior) # ensure enough frames have been preprocessed n_frames = self.frames_window assert len(self.img_frames) >= n_frames assert len(self.gray_frames) >= n_frames assert len(self.priors) == 1 imgs = np.stack(self.img_frames[:self.frames_window], axis=0) self.img_frames.pop(0) # slide window to the right self.gray_frames.pop(0) return imgs, [self.priors.pop(0)] def terminate(self): """Terminates processes, closes queues and releases video capture.""" if self.denoising: self.q_denoise.put(None) time.sleep(0.2) self.p_denoise.terminate() else: self.q_prior.put(None) time.sleep(0.2) self.p_prior.terminate() time.sleep(0.1) if self.denoising: self.p_denoise.join(timeout=0.5) self.p_prior.join(timeout=0.5) if self.denoising: self.q_denoise.close() self.q_parent.close() self.cap.release()