def background_logistic(hp:HParams, cfg: Config, q: mp.Queue): if cfg.log_wandb: wandb.init(project="falr", config=hp.as_dict, group=hp.md5, job_type='background') item = q.get() while item is not None: evaluate_logistic(*item) item = q.get()
def evaluate_hypervolume_sparsity_parallel(self, args, candidates, mask, virtual_ep_objs_batch): hv = [0.0 for _ in range(len(candidates))] sparsity = [0.0 for _ in range(len(candidates))] processes = [] max_process_num = args.num_tasks * args.num_processes queue = Queue() for i in range(len(candidates)): if mask[i]: p = Process(target=update_ep_and_compute_hypervolume_sparsity, args=(i, virtual_ep_objs_batch, candidates[i]['prediction'], queue)) p.start() processes.append(p) if len(processes) >= max_process_num: for _ in processes: task_id, hv_res, sparsity_res = queue.get() hv[task_id] = hv_res sparsity[task_id] = sparsity_res processes = [] for _ in processes: task_id, hv_res, sparsity_res = queue.get() hv[task_id] = hv_res sparsity[task_id] = sparsity_res return hv, sparsity
def dynamic_power(model, input_shape): q = Queue() power_return = Queue() interval_return = Queue() latency_return = Queue() input_tensor_queue = Queue() model_queue = Queue() input_tensor = torch.ones([*input_shape]) input_tensor_queue.put(input_tensor) model.share_memory() model_queue.put(model) context = torch.multiprocessing.get_context('spawn') p_thread = context.Process(target=power_thread, args=(power_return, interval_return, q)) l_thread = context.Process(target=latency_thread, args=(model_queue, input_tensor_queue, latency_return, q)) l_thread.start() p_thread.start() power_l = list() # GPU power list interval_l = list() # power interval list latency_l = list() # latency list l_thread.join() while True: if not power_return.empty(): power_l.append(power_return.get()) if not interval_return.empty(): interval_l.append(interval_return.get()) if not latency_return.empty(): latency_l.append(latency_return.get()) if power_return.empty() and interval_return.empty( ) and latency_return.empty(): break power_return.close() interval_return.close() latency_return.close() q.close() del q del power_return del latency_return del interval_return return latency_l, power_l, interval_l
def gather_result(result_queue: mp.Queue, total_video_count: int, model_artifact_name: str, total_len=None): fn_results: Dict[str, List[Tuple[int, torch.Tensor]]] = {} fns: List[Tuple] = [] pbar = tqdm(total=total_len, dynamic_ncols=True) while True: if len(fns) == total_video_count: break out_, fn_, idx_, done_ = result_queue.get() for out, fn, idx, done in zip(out_.clone(), fn_, idx_.clone(), done_.clone()): fn_results.setdefault(fn, list()) fn_results[fn].append((idx, out.clone())) pbar.update(1) if done: output = [t for _, t in sorted(fn_results.pop(fn))] output_path = fn2outfn(fn, model_artifact_name) safe_dir(output_path, with_filename=True) torch.save(output, output_path) fns.append((fn, output_path)) pbar.set_description(f'Done: {output_path}') del out_, fn_, idx_, done_ pbar.close() return fns
def heuristic_fn_runner(heuristic_fn_input_queue: Queue, heuristic_fn_output_queues, nnet_dir: str, device, on_gpu: bool, gpu_num: int, env: Environment, all_zeros: bool, clip_zero: bool, batch_size: Optional[int]): heuristic_fn = None if not all_zeros: heuristic_fn = load_heuristic_fn(nnet_dir, device, on_gpu, env.get_nnet_model(), env, gpu_num=gpu_num, clip_zero=clip_zero, batch_size=batch_size) while True: proc_id, states_nnet = heuristic_fn_input_queue.get() if proc_id is None: break if all_zeros: heuristics = np.zeros(states_nnet[0].shape[0], dtype=np.float) else: heuristics = heuristic_fn(states_nnet, is_nnet_format=True) heuristic_fn_output_queues[proc_id].put(heuristics) return heuristic_fn
def __detector_process(detector_cfg, recivq: Queue, sendqs, timeout, run_semaphore, pause_event): detector = __build_detector_component(detector_cfg) logger = get_logger() logger.info('create ' + str(detector_cfg['type'])) try: while True: if not run_semaphore.value: logger.info('通过信号量停止了detector') break pause_event.wait() kwargs = recivq.get(timeout=timeout) kwargs = detector(**kwargs) # 因为后续可能是backbones也可能是tracker所以使用list来发送 for sendq in sendqs: sendq.put(kwargs, timeout=timeout) except KeyboardInterrupt: logger.info('user stop the detector process') except Empty: logger.info('head不再发送数据detector自动释放') except Full: logger.exception('通向某一条主干或者跟踪器的队列已满') # except Exception as e: # logger.exception(e) finally: logger.info('release the detector source') del detector # 清除探测器对象 del logger torch.cuda.empty_cache() # 清空GPU缓存,防止出现进程STOP占用显存 recivq.cancel_join_thread() for sendq in sendqs: sendq.cancel_join_thread() sendq.close() recivq.close() return
def run_in_process_group(world_size, filename, fn, inputs): if torch.distributed.is_initialized(): torch.distributed.destroy_process_group() processes = [] q = Queue() wait_event = Event() # run the remaining processes # for rank in range(world_size - 1): for rank in range(world_size): p = Process( target=init_and_run_process, args=(rank, world_size, filename, fn, inputs[rank], q, wait_event), ) p.start() processes.append(p) # fetch the results from the queue before joining, the background processes # need to be alive if the queue contains tensors. See # https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847/3 # noqa: B950 results = [] for _ in range(len(processes)): results.append(q.get()) wait_event.set() for p in processes: p.join() return results
def mixup_process_worker_wrapper(q_input: mp.Queue, q_output: mp.Queue, device: int): """ :param q_input: input queue :param q_output: output queue :param device: running gpu device """ os.environ["CUDA_VISIBLE_DEVICES"] = f"{device}" print(f"Process generated with cuda:{device}") device = torch.device(f"cuda:{device}") while True: # Get and load on gpu out, target_reweighted, hidden, args, sc, A_dist, debug = q_input.get() out = out.to(device) target_reweighted = target_reweighted.to(device) sc = sc.to(device) A_dist = A_dist.to(device) # Run out, target_reweighted = mixup_process_worker(out, target_reweighted, hidden, args, sc, A_dist, debug) # To cpu and return out = out.cpu() target_reweighted = target_reweighted.cpu() q_output.put([out, target_reweighted])
def loop_test(network, device, transformer, img_q: Queue, bbox_q: Queue, threshold=0.35): scale = None print(f"NETWORK IS NONE {type(network)}") print("STARTING TO SPIN DETECT LOOP") while True: print("WAIT") image = img_q.get() print("RECV") if type(image) is str and image == "DONE": del image break print("CHECK") boxes = detect_face(image, network, transformer, device, threshold) print("SENDING") bbox_q.put(boxes) print("SENT") # DONT FORGET TO CLEANUP del image img_q.close() bbox_q.close() print("BYE")
def _run_game(process_id: int, game_factory: GameExecutorFactory, network: nn.Module, device: torch.device, request_queue: Queue, experience_queue: Queue, batch_size: int, transfer_blocks: int, transfer_to_device: bool) -> None: exploration_rate = 1. game = game_factory.create() print('* worker %d started' % process_id) while True: try: if not request_queue.empty(): request: _RunGameRequest = request_queue.get(block=False) if request.do_terminate: print('* game worker %d terminated' % process_id) experience_queue.close() request_queue.close() return if request.set_exploration_rate is not None: exploration_rate = request.set_exploration_rate block = [] for _ in range(transfer_blocks): eps, exps = game.multi_step(network, device, exploration_rate, batch_size) if transfer_to_device: exps = [ e.to_device(device, non_blocking=False) for e in exps ] block.append((eps, exps)) experience_queue.put(block, block=True) except Exception as e: print('error in worker %d: ' % process_id, e)
def self_multiplay(policy): q = Queue() finq = [] procs = [] policy.train(False) for i in range(MaxProcessNum): fin = Queue() t = Process(target=PlayProcess, args=(i, q, fin, policy)) t.start() procs.append(t) finq.append(fin) for i in range(MaxProcessNum): id = finq[i].get() print("finish process(%d)" % id) sys.stdout.flush() try: while not q.empty(): data_buffer.append(q.get(timeout=1)) except TimeoutError: pass print('finish Queue get') sys.stdout.flush() for i in range(len(procs)): p = procs[i] p.join(timeout=10) if p.is_alive(): print('forcing process(%d) to terminate' % i) sys.stdout.flush() p.terminate() print('finish join') sys.stdout.flush()
def _prefetch(in_queue: mp.Queue, out_queue: mp.Queue, batchsize: int, shutdown_event: mp.Event, target_device, waiting_time=5): """Continuously prefetches complete trajectories dropped by the :py:class:`~.TrajectoryStore` for training. As long as shutdown is not set, this method pulls :py:attr:`batchsize` trajectories from :py:attr:`in_queue`, transforms them into batches using :py:meth:`~_to_batch()` and puts them onto the :py:attr:`out_queue`. This usually runs as an asynchronous :py:obj:`multiprocessing.Process`. Parameters ---------- in_queue: :py:obj:`multiprocessing.Queue` A queue that delivers dropped trajectories from :py:class:`~.TrajectoryStore`. out_queue: :py:obj:`multiprocessing.Queue` A queue that delivers batches to :py:meth:`_loop()`. batchsize: `int` The number of trajectories that shall be processed into a batch. shutdown_event: :py:obj:`multiprocessing.Event` An event that breaks this methods internal loop. target_device: :py:obj:`torch.device` The target device of the batch. waiting_time: `float` Time the methods loop sleeps between each iteration. """ while not shutdown_event.is_set(): try: trajectories = [ in_queue.get(timeout=waiting_time) for _ in range(batchsize) ] except queue.Empty: continue batch = Learner._to_batch(trajectories, target_device) # delete Tensors after usage to free memory (see torch multiprocessing) del trajectories try: out_queue.put(batch) except (AssertionError, ValueError): # queue closed continue # delete Tensors after usage to free memory (see torch multiprocessing) del batch try: del trajectories except UnboundLocalError: # already deleted pass
class DataLoaderMultiFiles(object): """DataLoader to iterator over a set of DataSet""" def __init__(self, filepaths, partial, batch_s, buffer_s): self.filepaths = filepaths self.partial = partial self.batch_size = batch_s self.max_len = buffer_s self.buffer = Queue(maxsize=buffer_s) self.batch_queue = Queue(maxsize=10) def __iter__(self): print('Starting processes') random.seed(0) random.shuffle(self.filepaths) filepaths = deque() for path in self.filepaths: filepaths.append(path) self.buffr_processes = [] args = (self.filepaths, self.buffer, self.partial) for i in range(10): process = Process(target=fill_buffer, args=args) process.daemon = True process.start() self.buffr_processes.append(process) args = (self.buffer, self.batch_queue, self.batch_size) self.batch_process = Process(target=fill_batch, args=args) self.batch_process.daemon = True self.batch_process.start() return self def done_files(self): return sum([e.is_alive() for e in self.buffr_processes]) def __next__(self): # print('get batch') # print('buffer_queue: {}, batch_queue: {}'.format(self.buffer.qsize(), self.batch_queue.qsize())) # noqa timeout = 1 if self.done_files() == 0 else 60 try: batch = self.batch_queue.get(timeout=timeout) except Empty: self.kill() raise StopIteration # print('got batch') tmp = LongTensor(batch) # print('computing') return tmp def kill(self): print('Killing processes') self.buffr_process.terminate() self.batch_process.terminate() def __del__(self): self.kill()
def write_batches_from_queue_to_file(queue: Queue, file_path): with open(file_path, "a", newline="") as f: writer = csv.writer(f) while True: try: batch = queue.get(block=True, timeout=60) writer.writerows(batch) except Empty: print("Timeout during reading from WRITING queue.") print(file_path) return
class GenData: def __init__(self, fn, n_processes=4, max_size=200, batchsize=200): ##what needs to happen: def consumer(Q): iterator = get_supervised_batchsize(fn, batchsize=batchsize) #todo while True: try: # get a new message size = Q.qsize() #print(size) if size < max_size: # process the data ret = next(iterator) Q.put(ret) else: time.sleep(2) except ValueError as e: print( "I think you closed the thing while it was running, but that's okay" ) break except Exception as e: print("error!", e) break self.Q = Queue() print("started queue ...") # instantiate workers self.workers = [ Process(target=consumer, args=(self.Q, )) for i in range(n_processes) ] for w in self.workers: w.start() print("started parallel workers, ready to work!") def batchIterator(self): while True: yield self.Q.get() #yield from get_supervised_batchsize(self.Q.get, batchsize=batchsize) #is this a slow way of doing this?? def kill(self): #KILL stuff # tell all workers, no more data (one msg for each) # join on the workers for w in self.workers: try: w.close() #this will cause a valueError apparently?? except ValueError: print("killed a worker") continue
class BulkEvaluatorManager: def __init__(self, networks, device, num_evaluators, timeout=5): self.networks = networks self.device = device self.timeout = timeout self.job_queue = Queue() self.parent_conns = [] self.child_conns = [] for i in range(num_evaluators): parent_conn, child_conn = Pipe() self.parent_conns.append(parent_conn) self.child_conns.append(child_conn) def server_job(self, num_active_workers): num_networks = len(self.networks) while num_active_workers.value > 0: features = [[] for _ in range(num_networks)] conns = [[] for _ in range(num_networks)] for _ in range(num_active_workers.value): try: feature, network_id, evaluator_id = \ self.job_queue.get(timeout=self.timeout) features[network_id].append(torch.from_numpy(feature)) conns[network_id].append(self.parent_conns[evaluator_id]) except queue.Empty: break for network_id in range(num_networks): if len(features[network_id]) == 0: continue network = self.networks[network_id] network.eval() with torch.no_grad(): log_p, v = network( torch.stack(features[network_id]).to(self.device)) p = F.softmax(log_p, dim=1) p = p.cpu().numpy() v = v.cpu().numpy() for i in range(len(conns[network_id])): conns[network_id][i].send((p[i], v[i])) def get_server(self, num_active_workers): return Process(target=self.server_job, args=(num_active_workers, )) def get_evaluator(self, evaluator_id, network_id): return BulkEvaluator( evaluator_id, network_id, self.job_queue, self.child_conns[evaluator_id], )
def HandleWorkers(server: socket.socket, replay_memory: ReplayMemory, mem_lock: Lock, param_queue: Queue, shutdown: Value): print("Listening for new workers...") server.settimeout(1) # timeout period of 1 second num_workers = 0 workers: Dict[int, socket.socket] = dict() state_dict = None while shutdown.value <= 0: try: worker, _ = server.accept() print("Connected to new worker") worker_id = num_workers worker_proc = Process(target=ReceivePlayouts, args=(worker, worker_id, replay_memory, mem_lock), daemon=True) worker_proc.start() if state_dict is not None: # Send the new worker the most up-to-date params buffer = io.BytesIO() torch.save(state_dict, buffer) param_bytes = buffer.getvalue() communication.Send(worker, buffer.getvalue()) workers[worker_id] = worker num_workers += 1 except socket.timeout: pass if not param_queue.empty(): # Send the most up-to-date params to all the workers state_dict = None while not param_queue.empty(): state_dict = param_queue.get() assert (state_dict is not None) buffer = io.BytesIO() torch.save(state_dict, buffer) param_bytes = buffer.getvalue() print("Sending new params to workers") for worker_id in workers.keys(): worker: socket.socket = workers[worker_id] try: communication.Send(worker, param_bytes) except: # Something went wrong with this connection, so remove # this worker print(f"Error with worker {worker_id}, ending connection") workers.pop(worker_id)
def main(): args = parse_args() categories = parse_categories(parse_data(args.data)['names']) cap = cv2.VideoCapture(0) frame_queue = Queue() preds_queue = Queue() cur_dets = None frame_lock = Lock() proc = Process(target=detect, args=(frame_queue, preds_queue, frame_lock, args)) proc.start() try: while (True): ret, frame = cap.read() frame_lock.acquire() while not frame_queue.empty(): frame_queue.get() frame_queue.put(frame) frame_lock.release() if not preds_queue.empty(): cur_dets = preds_queue.get() if cur_dets is not None and len(cur_dets) > 0: frame = draw_detections_opencv(frame, cur_dets[0], categories) cv2.imshow('frame', frame) cv2.waitKey(1) except KeyboardInterrupt: print('Interrupted') proc.join() cap.release() cv2.destroyAllWindows()
def eval_all(args, config, network): # model_path saveDir = os.path.join('../model', args.model_dir, config.model_dir) evalDir = os.path.join('../model', args.model_dir, config.eval_dir) misc_utils.ensure_dir(evalDir) model_file = os.path.join(saveDir, 'dump-{}.pth'.format(args.resume_weights)) assert os.path.exists(model_file) # get devices str_devices = args.devices devices = misc_utils.device_parser(str_devices) # load data crowdhuman = CrowdHuman(config, if_train=False) #crowdhuman.records = crowdhuman.records[:10] # multiprocessing num_devs = len(devices) len_dataset = len(crowdhuman) num_image = math.ceil(len_dataset / num_devs) result_queue = Queue(500) procs = [] all_results = [] for i in range(num_devs): start = i * num_image end = min(start + num_image, len_dataset) proc = Process(target=inference, args=(config, network, model_file, devices[i], crowdhuman, start, end, result_queue)) proc.start() procs.append(proc) pbar = tqdm(total=len_dataset, ncols=50) for i in range(len_dataset): t = result_queue.get() all_results.append(t) pbar.update(1) pbar.close() for p in procs: p.join() fpath = os.path.join(evalDir, 'dump-{}.json'.format(args.resume_weights)) misc_utils.save_json_lines(all_results, fpath) # evaluation eval_path = os.path.join(evalDir, 'eval-{}.json'.format(args.resume_weights)) eval_fid = open(eval_path, 'w') res_line, JI = compute_JI.evaluation_all(fpath, 'box') for line in res_line: eval_fid.write(line + '\n') AP, MR = compute_APMR.compute_APMR(fpath, config.eval_source, 'box') line = 'AP:{:.4f}, MR:{:.4f}, JI:{:.4f}.'.format(AP, MR, JI) print(line) eval_fid.write(line + '\n') eval_fid.close()
def async_mol2graph(q: Queue, data: MoleculeDataset, args: Namespace, num_iters: int, iter_size: int, exit_q: Queue, last_batch: bool=False): batches = [] for i in range(0, num_iters, iter_size): # will only go up to max size of queue, then yield if not last_batch and i + args.batch_size > len(data): break batch = MoleculeDataset(data[i:i + args.batch_size]) batches.append(batch) if len(batches) == args.batches_per_queue_group: # many at a time, since synchronization is expensive with Pool() as pool: processed_batches = pool.map(mol2graph_helper, [(batch, args) for batch in batches]) q.put(processed_batches) batches = [] if len(batches) > 0: with Pool() as pool: processed_batches = pool.map(mol2graph_helper, [(batch, args) for batch in batches]) q.put(processed_batches) exit_q.get() # prevent from exiting until main process tells it to; otherwise we apparently can't read the end of the queue and crash
def cuda_is_available(): # hack to check if cuda is available. calling torch.cuda.is_available in # this process breaks the multiprocesscing of multiple environments # See: https://github.com/pytorch/pytorch/pull/2811 from torch.multiprocessing import Process, Queue def wrap_cuda_is_available(q): q.put(torch.cuda.is_available()) q = Queue() p = Process(target=wrap_cuda_is_available, args=(q, )) p.start() p.join() return q.get()
def _instances(self, file_path: str, manager: Manager, output_queue: Queue) -> Iterator[Instance]: """ A generator that reads instances off the output queue and yields them up until none are left (signified by all ``num_workers`` workers putting their ids into the queue). """ shards = list(CORPORA[file_path](file_path)) # Ensure a consistent order before shuffling for testing. # shards.sort() num_shards = len(shards) # If we want multiple epochs per read, put shards in the queue multiple times. input_queue = manager.Queue( num_shards * self.epochs_per_read + self.num_workers) for _ in range(self.epochs_per_read): np.random.shuffle(shards) for shard in shards: input_queue.put(shard) # Then put a None per worker to signify no more files. for _ in range(self.num_workers): input_queue.put(None) processes: List[Process] = [] num_finished = 0 for worker_id in range(self.num_workers): process = Process(target=_worker, args=(self.reader, input_queue, output_queue, worker_id)) logger.info(f"starting worker {worker_id}") process.start() processes.append(process) # Keep going as long as not all the workers have finished. while num_finished < self.num_workers: item = output_queue.get() if item is None: continue elif isinstance(item, int): # Means a worker has finished, so increment the finished count. num_finished += 1 logger.info( f"worker {item} finished ({num_finished}/{self.num_workers})") else: # Otherwise it's an ``Instance``, so yield it up. yield item for process in processes: process.join() processes.clear()
def collectGameDataParallel(network, useNetwork, T, width, height): totalGames = 0 game_images = [] game_targets = [] while totalGames < 80: images = Queue() targets = Queue() ngames = 5 barrier = Barrier(ngames + 1) processes=[Process(target=collectGameData, args=(barrier,play_game, network,\ useNetwork, T, width,height, images, targets)) \ for _ in range(ngames)] for p in processes: p.start() for _ in range(ngames): im = images.get() game_images.append(copy.deepcopy(im)) del im t = targets.get() game_targets.append(copy.deepcopy(t)) del t barrier.wait() for p in processes: p.join() totalGames += ngames flattened_images = list(itertools.chain.from_iterable(game_images)) flattened_targets = list(itertools.chain.from_iterable(game_targets)) batchSize = min(len(flattened_images), 2048) sample_indices = numpy.random.choice(range(len(flattened_images)), batchSize) sample_images = [flattened_images[i] for i in sample_indices] sample_targets = [flattened_targets[i] for i in sample_indices] return sample_images, sample_targets
def inference_task(inference_queue: mp.Queue, batch_max_size: int): inference_service = InferenceService(cfg) while True: images: list images, send_pipe = inference_queue.get(True) send_pipes = [send_pipe] send_pipe_num_images = [len(images)] # get more while True: if len(images) >= batch_max_size: break else: try: images_more, send_pipe_more = inference_queue.get( False) images.extend(images_more) send_pipes.append(send_pipe_more) send_pipe_num_images.append(len(images_more)) except Exception: break dataloader = DataLoader( MemoryFiles(images, None), shuffle=False, num_workers=1, batch_size=batch_max_size, collate_fn=MemoryFilesCollator( inference_service.cfg.DATALOADER.SIZE_DIVISIBILITY)) results = [] for batch in dataloader: result = inference_service.run_inference(batch) results.extend(result) pipe: multiprocessing.connection.Connection for (pipe, num_imgs) in zip(send_pipes, send_pipe_num_images): pipe.send(results[:num_imgs]) del results[:num_imgs]
def collect_samples(self, min_batch_size): torch.set_num_threads(1) self.policy.to(torch.device('cpu')) self.encoder.to(torch.device('cpu')) t_start = time.time() process_batch_size = int(math.floor(min_batch_size / self.num_process)) queue = Queue() workers = [] for i in range(self.num_process - 1): # don't render other parallel processes worker_args = (i + 1, queue, self.env, self.policy, self.encoder, False, self.running_state, self.custom_reward, process_batch_size) p = Process(target=collect_samples, args=worker_args) workers.append(p) for worker in workers: worker.start() memory, log = collect_samples(0, None, self.env, self.policy, self.encoder, self.render, self.running_state, self.custom_reward, process_batch_size) worker_logs = [None] * len(workers) worker_memories = [None] * len(workers) for _ in workers: pid, worker_memory, worker_log = queue.get(timeout=0.5) worker_memories[pid - 1] = worker_memory worker_logs[pid - 1] = worker_log [worker.join() for worker in workers] # concat all memories for worker_memory in worker_memories: memory.append(worker_memory) if self.num_process > 1: log_list = [log] + worker_logs log = merge_log(log_list) t_end = time.time() log['sample_time'] = t_end - t_start self.policy.to(device) self.encoder.to(device) return memory, log
def tracking(queue_items: mp.Queue, area): txt_writer = open(txt_path, 'wt') deepsorts = [] for i in range(5): deepsort = DeepSort("deep/checkpoint/ckpt.t7") deepsort.extractor.net.share_memory() deepsorts.append(deepsort) xmin, ymin, xmax, ymax = area while True: try: queue_item = queue_items.get(block=True, timeout=3) except queue.Empty: print('Empty queue. End?') break batch_results = queue_item.detect_results imgs = queue_item.imgs ori_imgs = queue_item.ori_imgs frame_ids = queue_item.frame_ids for batch_idx, results in enumerate(batch_results): # frame by frame for class_id in [1, 2, 3, 4]: bbox_xywh, cls_conf = bbox_to_xywh_cls_conf(results, class_id) if (bbox_xywh is not None) and (len(bbox_xywh) > 0): outputs = deepsorts[class_id].update( bbox_xywh, cls_conf, imgs[batch_idx]) if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] offset = (xmin, ymin) if is_write: ori_im = draw_bboxes(ori_imgs[batch_idx], bbox_xyxy, identities, class_id, offset=(xmin, ymin)) for i, box in enumerate(bbox_xyxy): x1, y1, x2, y2 = [int(i) for i in box] x1 += offset[0] x2 += offset[0] y1 += offset[1] y2 += offset[1] idx = int( identities[i]) if identities is not None else 0 txt_writer.write( f'{frame_ids[batch_idx]} {class_id} {idx} {x1} {y1} {x2} {y2}\n' ) txt_writer.close()
def inference_video( model: torch.nn.Module, gpu_id: int, data_queue: mp.Queue, result_queue: mp.Queue, # dataset: Union[Video_2D_Inference, Video_3D_Inference], # batch_size: int, num_worker: int, ): model = model.eval().cuda(device=gpu_id) with torch.no_grad(): # for data, fn, idx, done in DataLoader(dataset, batch_size=batch_size, num_workers=num_worker): while True: data, fn, idx, done = data_queue.get() out = model(data.cuda(device=gpu_id)).detach().cpu() result_queue.put((out, fn, idx.clone(), done.clone())) del data, idx, done
class IterableParquetDataset(IterableDataset): def __init__(self, path, process_func): super().__init__() dataset = ds.dataset(path) self.process_func = process_func self.batches = Queue() [self.batches.put(batch) for batch in dataset.to_batches()] def __iter__(self): while True: if self.batches.empty() == True: self.batches.close() break batch = self.batches.get().to_pydict() batch.update(self.process_func(batch)) yield batch
def decorated_function(*args, **kwargs): queue = Queue() def _queue_result(): exception, trace, res = None, None, None try: res = func(*args, **kwargs) except Exception as e: exception = e trace = traceback.format_exc() queue.put((res, exception, trace)) start_new_thread(_queue_result, ()) result, exception, trace = queue.get() if exception is None: return result else: assert isinstance(exception, Exception) raise exception.__class__(trace)
def _instances(self, file_path: str, manager: Manager, output_queue: Queue) -> Iterator[Instance]: """ A generator that reads instances off the output queue and yields them up until none are left (signified by all ``num_workers`` workers putting their ids into the queue). """ shards = glob.glob(file_path) num_shards = len(shards) # If we want multiple epochs per read, put shards in the queue multiple times. input_queue = manager.Queue(num_shards * self.epochs_per_read + self.num_workers) for _ in range(self.epochs_per_read): random.shuffle(shards) for shard in shards: input_queue.put(shard) # Then put a None per worker to signify no more files. for _ in range(self.num_workers): input_queue.put(None) processes: List[Process] = [] num_finished = 0 for worker_id in range(self.num_workers): process = Process(target=_worker, args=(self.reader, input_queue, output_queue, worker_id)) logger.info(f"starting worker {worker_id}") process.start() processes.append(process) # Keep going as long as not all the workers have finished. while num_finished < self.num_workers: item = output_queue.get() if isinstance(item, int): # Means a worker has finished, so increment the finished count. num_finished += 1 logger.info(f"worker {item} finished ({num_finished}/{self.num_workers})") else: # Otherwise it's an ``Instance``, so yield it up. yield item for process in processes: process.join() processes.clear()
def main(args): # Parse device ids default_dev, *parallel_dev = parse_devices(args.devices) all_devs = parallel_dev + [default_dev] all_devs = [int(x.replace('gpu', '')) for x in all_devs] nr_devs = len(all_devs) print("nr_dev: {}".format(nr_devs)) nr_files = len(broden_dataset.record_list['validation_my_material']) if args.num_val > 0: nr_files = min(nr_files, args.num_val) nr_files_per_dev = math.ceil(nr_files / nr_devs) pbar = tqdm(total=nr_files) result_queue = Queue(5) procs = [] for dev_id in range(nr_devs): start_idx = dev_id * nr_files_per_dev end_idx = min(start_idx + nr_files_per_dev, nr_files) proc = Process(target=worker, args=(args, dev_id, start_idx, end_idx, result_queue)) print('process:%d, start_idx:%d, end_idx:%d' % (dev_id, start_idx, end_idx)) proc.start() procs.append(proc) # master fetches results all_result = [] for i in range(nr_files): all_result.append(result_queue.get()) pbar.update(1) for p in procs: p.join() benchmark = get_benchmark_result(all_result) print('[Eval Summary]:') print(benchmark) print('Evaluation Done!')
def _worker(reader: DatasetReader, input_queue: Queue, output_queue: Queue, index: int) -> None: """ A worker that pulls filenames off the input queue, uses the dataset reader to read them, and places the generated instances on the output queue. When there are no filenames left on the input queue, it puts its ``index`` on the output queue and doesn't do anything else. """ # Keep going until you get a file_path that's None. while True: file_path = input_queue.get() if file_path is None: # Put my index on the queue to signify that I'm finished output_queue.put(index) break logger.info(f"reading instances from {file_path}") for instance in reader.read(file_path): output_queue.put(instance)
r_list = [0] * pop_size # result list solutions = es.ask() # push parameters to queue for s_id, s in enumerate(solutions): for _ in range(n_samples): p_queue.put((s_id, s)) # retrieve results if args.display: pbar = tqdm(total=pop_size * n_samples) for _ in range(pop_size * n_samples): while r_queue.empty(): sleep(.1) r_s_id, r = r_queue.get() r_list[r_s_id] += r / n_samples if args.display: pbar.update(1) if args.display: pbar.close() es.tell(solutions, r_list) es.disp() # evaluation and saving if epoch % log_step == log_step - 1: best_params, best, std_best = evaluate(solutions, r_list) print("Current evaluation: {}".format(best)) if not cur_best or cur_best > best: cur_best = best