def optimize_parallel_gpu(
            self,
            train_function,
            gpu_ids,
            max_nb_trials=None,
    ):
        """
        Runs optimization across gpus with cuda drivers
        :param train_function:
        :param max_nb_trials:
        :param gpu_ids: List of strings like: ['0', '1, 3']
        :return:
        """
        self.trials = strategies.generate_trials(
            strategy=self.strategy,
            flat_params=self.__flatten_params(self.opt_args),
            nb_trials=max_nb_trials,
        )

        self.trials = [(self.__namespace_from_trial(x), train_function) for x in self.trials]

        # build q of gpu ids so we can use them in each process
        # this is thread safe so each process can pull out a gpu id, run its task and put it back when done
        if self.pool is None:
            gpu_q = Queue()
            for gpu_id in gpu_ids:
                gpu_q.put(gpu_id)

            # init a pool with the nb of worker threads we want
            nb_workers = len(gpu_ids)
            self.pool = Pool(processes=nb_workers, initializer=init, initargs=(gpu_q,))

        # apply parallelization
        results = self.pool.map(optimize_parallel_gpu_private, self.trials)
        return results
示例#2
0
def _create_tensor_dicts_from_queue(input_queue: Queue, output_queue: Queue,
                                    iterator: DataIterator, shuffle: bool,
                                    index: int) -> None:
    """
    Pulls instances from ``input_queue``, converts them into ``TensorDict``s
    using ``iterator``, and puts them on the ``output_queue``.
    """
    logger.info(f"Iterator worker: {index} PID: {os.getpid()}")

    def instances() -> Iterator[Instance]:
        instance = input_queue.get()
        while instance is not None:
            yield instance
            instance = input_queue.get()

    for tensor_dict in iterator(instances(), num_epochs=1, shuffle=shuffle):
        output_queue.put(tensor_dict)

    output_queue.put(index)

    # We need to ensure we've gotten all the tensors out of this queue before
    # this process ends. Otherwise we'll crash. See
    # https://github.com/pytorch/pytorch/issues/7181. This appears to be an
    # issue specifically with tensors, perhaps due to the refcounting involved
    # in managing them in shared memory. If you're working on this code, be
    # aware that I've only been able to reproduce this issue on Linux.  Testing
    # on a Mac alone is not sufficient.
    output_queue.join()
    def optimize_trials_parallel_gpu(
            self,
            train_function,
            nb_trials,
            trials,
            gpu_ids,
            nb_workers=4,
    ):
        """
        Runs optimization across gpus with cuda drivers
        :param train_function:
        :param nb_trials:
        :param gpu_ids: List of strings like: ['0', '1, 3']
        :param nb_workers:
        :return:
        """
        self.trials = trials
        self.trials = [(x, train_function) for x in self.trials]

        # build q of gpu ids so we can use them in each process
        # this is thread safe so each process can pull out a gpu id, run its task and put it back when done
        if self.pool is None:
            gpu_q = Queue()
            for gpu_id in gpu_ids:
                gpu_q.put(gpu_id)

            # init a pool with the nb of worker threads we want
            self.pool = Pool(processes=nb_workers, initializer=init, initargs=(gpu_q,))

        # apply parallelization
        results = self.pool.map(optimize_parallel_gpu_private, self.trials)
        return results
示例#4
0
def _worker_loop(dataset, job_queue: mp.Queue, result_queue: mp.Queue,
                 interrupt_event: mp.Event):
    logger = logging.getLogger("worker_loop")
    logger.debug("Worker started.")
    while True:
        logger.debug("Trying to fetch from job_queue.")
        if interrupt_event.is_set():
            logger.debug("Received interrupt signal, breaking.")
            break
        try:
            # This assumes that the job_queue is fully populated before the worker is started.
            index = job_queue.get_nowait()
            logger.debug("Fetch successful.")
        except Empty:
            logger.debug("Queue empty, setting up poison pill.")
            index = None
        if index is None or interrupt_event.is_set():
            logger.debug(
                "Fetched poison pill or received interrupt signal, breaking.")
            break
        try:
            logger.debug("Sampling index {} from dataset.".format(index))
            sample = dataset[index]
        except Exception:
            logger.debug("Dataset threw an exception.".format(index),
                         exc_info=1)
            result_queue.put((index, ExceptionWrapper(sys.exc_info())))
        else:
            logger.debug(
                "Putting sample at index {} in the result queue.".format(
                    index))
            result_queue.put((index, sample))
示例#5
0
    def work(self, job_q: Queue, done_q: Queue, num_jobs_to_perform: int,
             device: torch.device, logger: MLgymLoggerIF):

        logger.log(LogLevel.INFO,
                   f"Process {self.process_id} started working.")
        jobs_done_count = 0
        for job in iter(job_q.get,
                        None):  # https://stackoverflow.com/a/21157892
            job.device = device
            job.executing_process_id = self.process_id
            logger.log(
                LogLevel.INFO,
                f"Process {job.executing_process_id} started job {job.job_id} on {job.device}."
            )
            job.starting_time = time.time()
            if job.job_type == JobType.CALC:
                self._do_calc(job)
            job.finishing_time = time.time()
            job.done = True
            jobs_done_count += 1
            if job.job_type == JobType.TERMINATE or num_jobs_to_perform == jobs_done_count:
                logger.log(LogLevel.DEBUG,
                           f"Process {self.process_id} terminated.")
                done_q.put(job)
                break
def read_data(dataset: Union[Video_2D_Inference, Video_3D_Inference],
              batch_size: int, num_worker: int, data_queue: mp.Queue):
    mp.set_sharing_strategy('file_system')
    for item in DataLoader(dataset,
                           batch_size=batch_size,
                           num_workers=num_worker):
        data_queue.put(item)
示例#7
0
def start_processes_zombie_test(
    idx: int,
    entrypoint: Union[str, Callable],
    mp_queue: mp.Queue,
    log_dir: str,
    nproc: int = 2,
) -> None:
    """
    Starts processes
    """

    args = {}
    envs = {}
    for idx in range(nproc):
        args[idx] = ()
        envs[idx] = {}

    pc = start_processes(
        name="zombie_test",
        entrypoint=entrypoint,
        args=args,
        envs=envs,
        log_dir=log_dir,
        redirects=Std.NONE,
    )
    my_pid = os.getpid()
    mp_queue.put(my_pid)
    for child_pid in pc.pids().values():
        mp_queue.put(child_pid)

    try:
        pc.wait(period=1, timeout=300)
    except SignalException as e:
        pc.close(e.sigval)
示例#8
0
  def train(self, data_loaders, num_updates = 5, tb=None, num_iters=250000):
    data_queue = Queue()
    # for notifying when to recieve data
    data_event = Event()
    # for notifying this method when to send new data
    process_event = Event()
    # so doesn't hang on first iteration
    process_event.set()
    num_tasks = len(data_loaders)
    
    processes = []
    for process_id in range(self.world_size):
      processes.append(Process(target=self.init_process, 
                        args=(process_id, data_queue, data_event, 
                          process_event, num_updates,
                          tb if process_id == 0 else None)))
      processes[-1].start()

    for num_iter in range(num_iters):
      print("num iter:",num_iter)
      process_event.wait()
      process_event.clear()
      tasks = np.random.randint(0, num_tasks, (self.world_size))
      for task in tasks:
        task_data = next(data_loaders[task])
        # place holder for sampling data from dataset
        data_queue.put((task_data[0].numpy()[0], task_data[1].numpy()[0], 
                task_data[2].numpy()[0], task_data[3].numpy()[0]))
      data_event.set()

    for p in processes:
      p.terminate()
      p.join()
示例#9
0
def update_runner(num_states: int, back_max: int, update_batch_size: int,
                  heur_fn_i_q, heur_fn_o_q, proc_id: int, env: Environment,
                  result_queue: Queue, num_steps: int, update_method: str,
                  eps_max: float):
    heuristic_fn = nnet_utils.heuristic_fn_queue(heur_fn_i_q, heur_fn_o_q,
                                                 proc_id, env)

    start_idx: int = 0
    while start_idx < num_states:
        end_idx: int = min(start_idx + update_batch_size, num_states)

        states_itr, _ = env.generate_states(end_idx - start_idx, (0, back_max))

        if update_method.upper() == "GBFS":
            states_update, cost_to_go_update, is_solved = gbfs_update(
                states_itr, env, num_steps, heuristic_fn, eps_max)
        elif update_method.upper() == "ASTAR":
            states_update, cost_to_go_update, is_solved = astar_update(
                states_itr, env, num_steps, heuristic_fn)
        else:
            raise ValueError("Unknown update method %s" % update_method)

        states_update_nnet: List[np.ndaray] = env.state_to_nnet_input(
            states_update)

        result_queue.put((states_update_nnet, cost_to_go_update, is_solved))

        start_idx: int = end_idx

    result_queue.put(None)
def _run_game(process_id: int, game_factory: GameExecutorFactory,
              network: nn.Module, device: torch.device, request_queue: Queue,
              experience_queue: Queue, batch_size: int, transfer_blocks: int,
              transfer_to_device: bool) -> None:
    exploration_rate = 1.
    game = game_factory.create()
    print('* worker %d started' % process_id)
    while True:
        try:
            if not request_queue.empty():
                request: _RunGameRequest = request_queue.get(block=False)
                if request.do_terminate:
                    print('* game worker %d terminated' % process_id)
                    experience_queue.close()
                    request_queue.close()
                    return
                if request.set_exploration_rate is not None:
                    exploration_rate = request.set_exploration_rate

            block = []
            for _ in range(transfer_blocks):
                eps, exps = game.multi_step(network, device, exploration_rate,
                                            batch_size)
                if transfer_to_device:
                    exps = [
                        e.to_device(device, non_blocking=False) for e in exps
                    ]
                block.append((eps, exps))
            experience_queue.put(block, block=True)
        except Exception as e:
            print('error in worker %d: ' % process_id, e)
示例#11
0
def mixup_process_worker_wrapper(q_input: mp.Queue, q_output: mp.Queue,
                                 device: int):
    """
    :param q_input:		input queue
    :param q_output:	output queue
    :param device:		running gpu device
    """
    os.environ["CUDA_VISIBLE_DEVICES"] = f"{device}"
    print(f"Process generated with cuda:{device}")
    device = torch.device(f"cuda:{device}")
    while True:
        # Get and load on gpu
        out, target_reweighted, hidden, args, sc, A_dist, debug = q_input.get()
        out = out.to(device)
        target_reweighted = target_reweighted.to(device)
        sc = sc.to(device)
        A_dist = A_dist.to(device)

        # Run
        out, target_reweighted = mixup_process_worker(out, target_reweighted,
                                                      hidden, args, sc, A_dist,
                                                      debug)
        # To cpu and return
        out = out.cpu()
        target_reweighted = target_reweighted.cpu()
        q_output.put([out, target_reweighted])
示例#12
0
def _create_tensor_dicts_from_qiterable(qiterable: QIterable,
                                        output_queue: Queue,
                                        iterator: DataIterator, shuffle: bool,
                                        index: int) -> None:
    """
    Pulls instances from ``qiterable.output_queue``, converts them into
    ``TensorDict``s using ``iterator``, and puts them on the ``output_queue``.
    """
    logger.info(f"Iterator worker: {index} PID: {os.getpid()}")

    def instances() -> Iterator[Instance]:
        while qiterable.num_active_workers.value > 0 or qiterable.num_inflight_items.value > 0:
            while True:
                try:
                    yield qiterable.output_queue.get(block=False, timeout=1.0)
                    with qiterable.num_inflight_items.get_lock():
                        qiterable.num_inflight_items.value -= 1
                except Empty:
                    break

    for tensor_dict in iterator(instances(), num_epochs=1, shuffle=shuffle):
        output_queue.put(tensor_dict)

    output_queue.put(index)

    # See the note above in _create_tensor_dicts_from_queue.
    output_queue.join()
示例#13
0
def loop_test(network,
              device,
              transformer,
              img_q: Queue,
              bbox_q: Queue,
              threshold=0.35):
    scale = None
    print(f"NETWORK IS NONE {type(network)}")
    print("STARTING TO SPIN DETECT LOOP")
    while True:
        print("WAIT")
        image = img_q.get()
        print("RECV")
        if type(image) is str and image == "DONE":
            del image
            break
        print("CHECK")
        boxes = detect_face(image, network, transformer, device, threshold)

        print("SENDING")
        bbox_q.put(boxes)
        print("SENT")
        # DONT FORGET TO CLEANUP
        del image
    img_q.close()
    bbox_q.close()
    print("BYE")
示例#14
0
    def _prefetch(in_queue: mp.Queue,
                  out_queue: mp.Queue,
                  batchsize: int,
                  shutdown_event: mp.Event,
                  target_device,
                  waiting_time=5):
        """Continuously prefetches complete trajectories dropped by
        the :py:class:`~.TrajectoryStore` for training.

        As long as shutdown is not set, this method
        pulls :py:attr:`batchsize` trajectories from :py:attr:`in_queue`,
        transforms them into batches using :py:meth:`~_to_batch()`
        and puts them onto the :py:attr:`out_queue`.

        This usually runs as an asynchronous :py:obj:`multiprocessing.Process`.

        Parameters
        ----------
        in_queue: :py:obj:`multiprocessing.Queue`
            A queue that delivers dropped trajectories from :py:class:`~.TrajectoryStore`.
        out_queue: :py:obj:`multiprocessing.Queue`
            A queue that delivers batches to :py:meth:`_loop()`.
        batchsize: `int`
            The number of trajectories that shall be processed into a batch.
        shutdown_event: :py:obj:`multiprocessing.Event`
            An event that breaks this methods internal loop.
        target_device: :py:obj:`torch.device`
            The target device of the batch.
        waiting_time: `float`
            Time the methods loop sleeps between each iteration.
        """

        while not shutdown_event.is_set():
            try:
                trajectories = [
                    in_queue.get(timeout=waiting_time)
                    for _ in range(batchsize)
                ]
            except queue.Empty:
                continue

            batch = Learner._to_batch(trajectories, target_device)
            # delete Tensors after usage to free memory (see torch multiprocessing)
            del trajectories

            try:
                out_queue.put(batch)
            except (AssertionError, ValueError):  # queue closed
                continue

        # delete Tensors after usage to free memory (see torch multiprocessing)
        del batch
        try:
            del trajectories
        except UnboundLocalError:  # already deleted
            pass
示例#15
0
def dynamic_power(model, input_shape):
    q = Queue()
    power_return = Queue()
    interval_return = Queue()
    latency_return = Queue()
    input_tensor_queue = Queue()
    model_queue = Queue()

    input_tensor = torch.ones([*input_shape])
    input_tensor_queue.put(input_tensor)

    model.share_memory()

    model_queue.put(model)

    context = torch.multiprocessing.get_context('spawn')

    p_thread = context.Process(target=power_thread,
                               args=(power_return, interval_return, q))
    l_thread = context.Process(target=latency_thread,
                               args=(model_queue, input_tensor_queue,
                                     latency_return, q))

    l_thread.start()
    p_thread.start()

    power_l = list()  # GPU power list
    interval_l = list()  # power interval list
    latency_l = list()  # latency list

    l_thread.join()

    while True:
        if not power_return.empty():
            power_l.append(power_return.get())
        if not interval_return.empty():
            interval_l.append(interval_return.get())
        if not latency_return.empty():
            latency_l.append(latency_return.get())
        if power_return.empty() and interval_return.empty(
        ) and latency_return.empty():
            break

    power_return.close()
    interval_return.close()
    latency_return.close()
    q.close()

    del q
    del power_return
    del latency_return
    del interval_return

    return latency_l, power_l, interval_l
示例#16
0
def evaluate_single_thread(p_id, model, config, seeds_per_thread, output: Queue):
    rewards = []
    modified_rewards = []
    steps_counts = []
    infos = []
    for seed_plus in range(p_id * seeds_per_thread, (p_id + 1) * seeds_per_thread):
        explorer_seed = 721 + seed_plus * 29
        set_seeds(explorer_seed)

        internal_env_args = {'env_type': 'virtual',
                             'env_init_args': {
                                 'host_tcp': config['training']['client']['host_tcp'],
                                 'port_tcp': config['training']['client']['port_tcp_start'] + p_id
                             },
                             'env_config': config['environment']['core']
                             }
        internal_env_args['env_config']['seed'] = explorer_seed

        env = create_env(config, internal_env_args, transfer=config['training']['transfer'])
        observation = env.reset()

        done = False
        steps = 0
        reward_sum = 0.0
        reward_modified_sum = 0.0

        while not done:
            observation_transformed, _ = observation

            observation, (reward, reward_modified), done, _ = env.step(model.act(observation_transformed))

            reward_sum += reward
            reward_modified_sum += reward_modified

            steps += config["environment"]["wrapper"]["repeat_actions"]
        target_velocities = [[float(v) for v in tv]
                             for tv in np.unique([obs["target_vel"]
                                                  for obs in env.observations], axis=0)]
        velocity_similarity_measure = [np.linalg.norm(np.array(obs["target_vel"])[[0, 2]]
                                                      - np.array(obs["body_vel"]["pelvis"])[[0, 2]])
                                       for obs in env.observations]
        velocity_confidence_intervals = [mean_confidence_interval(velocity_similarity_measure, 0.95),
                                         mean_confidence_interval(velocity_similarity_measure, 0.99)]
        rewards.append(reward_sum)
        modified_rewards.append(reward_modified_sum)
        steps_counts.append(steps)
        print(explorer_seed, ':', reward_sum, ':', steps)
        infos.append({"target": target_velocities,
                      "target_similarity_confidence_intervals": velocity_confidence_intervals,
                      "seed": explorer_seed})
    output.put((rewards, modified_rewards, steps_counts, infos))
def multiprocess_training_loader(process_number: int, _config,
                                 _queue: mp.Queue, _wait_for_exit: mp.Event,
                                 _local_file, _fasttext_vocab_cached_mapping,
                                 _fasttext_vocab_cached_data):

    # workflow: we tokenize the data files with the costly spacy before training in a preprocessing step
    # (and concat the tokens with single whitespaces), so here we only split on the whitepsaces
    _tokenizer = None
    if _config["preprocessed_tokenized"] == True:
        _tokenizer = WordTokenizer(word_splitter=JustSpacesWordSplitter())

    if _config["token_embedder_type"] == "embedding":
        _token_indexers = {
            "tokens": SingleIdTokenIndexer(lowercase_tokens=True)
        }
        _vocab = Vocabulary.from_files(_config["vocab_directory"])

    elif _config["token_embedder_type"] == "fasttext":
        _token_indexers = {
            "tokens": FastTextNGramIndexer(_config["fasttext_max_subwords"])
        }
        _vocab = FastTextVocab(_fasttext_vocab_cached_mapping,
                               _fasttext_vocab_cached_data,
                               _config["fasttext_max_subwords"])

    elif _config["token_embedder_type"] == "elmo":
        _token_indexers = {"tokens": ELMoTokenCharactersIndexer()}
        _vocab = None

    _triple_loader = IrTripleDatasetReader(
        lazy=True,
        tokenizer=_tokenizer,
        token_indexers=_token_indexers,
        max_doc_length=_config["max_doc_length"],
        max_query_length=_config["max_query_length"])

    _iterator = BucketIterator(batch_size=int(_config["batch_size_train"]),
                               sorting_keys=[("doc_pos_tokens", "num_tokens"),
                                             ("doc_neg_tokens", "num_tokens")])

    _iterator.index_with(_vocab)

    for training_batch in _iterator(_triple_loader.read(_local_file),
                                    num_epochs=1):

        _queue.put(
            training_batch)  # this moves the tensors in to shared memory

    _queue.close()  # indicate this local thread is done
    _wait_for_exit.wait(
    )  # keep this process alive until all the shared memory is used and not needed anymore
示例#18
0
def ReceivePlayouts(worker: socket.socket, worker_id: int, out_queue: Queue):
    worker.setblocking(True)
    while True:
        try:
            msg: bytes = communication.Receive(worker)
        except Exception as err:
            print(f"Error with worker {worker_id}, ending connection")
            worker.close()
            return

        buffer = io.BytesIO(msg)
        tensor = torch.load(buffer)

        print(f"Received message {tensor}")
        out_queue.put(tensor)
示例#19
0
def ReceiveParams(server: socket.socket, param_queue: Queue):
    server.setblocking(True)
    print("Listening for network updates...")
    while True:
        try:
            msg = communication.Receive(server)
        except Exception as err:
            print(f"Error with server connection, ending connection")
            server.close()
            return

        buffer = io.BytesIO(msg)
        state_dict = torch.load(buffer)

        param_queue.put(state_dict)
示例#20
0
def _queuer(instances: Iterable[Instance], input_queue: Queue,
            num_workers: int, num_epochs: Optional[int]) -> None:
    """
    Reads Instances from the iterable and puts them in the input_queue.
    """
    epoch = 0

    while num_epochs is None or epoch < num_epochs:
        epoch += 1
        for instance in instances:
            input_queue.put(instance)

    # Now put a None for each worker, since each needs to receive one
    # to know that it's done.
    for _ in range(num_workers):
        input_queue.put(None)
def inference_video(
    model: torch.nn.Module,
    gpu_id: int,
    data_queue: mp.Queue,
    result_queue: mp.Queue,
    # dataset: Union[Video_2D_Inference, Video_3D_Inference],
    # batch_size: int, num_worker: int,
):
    model = model.eval().cuda(device=gpu_id)
    with torch.no_grad():
        # for data, fn, idx, done in DataLoader(dataset, batch_size=batch_size, num_workers=num_worker):
        while True:
            data, fn, idx, done = data_queue.get()
            out = model(data.cuda(device=gpu_id)).detach().cpu()
            result_queue.put((out, fn, idx.clone(), done.clone()))
            del data, idx, done
示例#22
0
def _create_tensor_dicts(input_queue: Queue, output_queue: Queue,
                         iterator: DataIterator, shuffle: bool,
                         index: int) -> None:
    """
    Pulls at most ``max_instances_in_memory`` from the input_queue,
    groups them into batches of size ``batch_size``, converts them
    to ``TensorDict`` s, and puts them on the ``output_queue``.
    """
    def instances() -> Iterator[Instance]:
        instance = input_queue.get()
        while instance is not None:
            yield instance
            instance = input_queue.get()

    for tensor_dict in iterator(instances(), num_epochs=1, shuffle=shuffle):
        output_queue.put(tensor_dict)

    output_queue.put(index)
def _worker(reader: DatasetReader, input_queue: Queue, output_queue: Queue,
            index: int) -> None:
    """
    A worker that pulls filenames off the input queue, uses the dataset reader
    to read them, and places the generated instances on the output queue.
    When there are no filenames left on the input queue, it puts its ``index``
    on the output queue and doesn't do anything else.
    """
    # Keep going until you get a file_path that's None.
    while True:
        file_path = input_queue.get()
        if file_path is None:
            # Put my index on the queue to signify that I'm finished
            output_queue.put(index)
            break

        logger.info(f"reading instances from {file_path}")
        for instance in reader.read(file_path):
            output_queue.put(instance)
示例#24
0
def run_workers_in_parallel(task_queue: mp.Queue, worker):
    NUMBER_OF_PROCESSES = min(int(mp.cpu_count() * 1.1), task_queue.qsize())

    # TODO: We've noticed that on certain 2 core machine parallelizing the tests
    # makes the llvm backend legacy pass manager 20x slower than using a
    # single process. Need to investigate the root cause eventually. This is a
    # hack to work around this issue.
    if mp.cpu_count() == 2:
        NUMBER_OF_PROCESSES = 1

    processes = []
    for i in range(NUMBER_OF_PROCESSES):
        p = mp.get_context("fork").Process(target=worker, args=(task_queue, ))
        p.start()
        processes.append(p)
    for i in range(NUMBER_OF_PROCESSES):
        task_queue.put(queue_sentinel)
    for p in processes:
        p.join()
示例#25
0
class RemoteTaskManager:
    def __init__(self, factory_env, factory_mgr, n_tasks):
        self.pipe_cmd = Queue()  # we want to queue more data in a row
        self.pipe_data = [SimpleQueue() for _ in range(n_tasks + 1)]

        self.factory_mgr = factory_mgr

        # create thread ( in main process!! ) which will handle requests!
        self.com = RemoteTaskServer(factory_mgr, factory_env, self.pipe_cmd,
                                    self.pipe_data)

        self.dtb = {}
        self.lock = threading.RLock()

        #    def turnon():
        self.com.start()

    def _ind(self, bot_id, objective_id):
        key = (bot_id, objective_id)
        assert key in self.dtb, "you forgot to register your environment [remote] in ctor!!"
        with self.lock:
            return self.dtb[key]

    def register(self, bot_id, objective_id):
        key = (bot_id, objective_id)
        with self.lock:  # multibot register
            assert key not in self.dtb, "double initialization of your environment [remote]!!"
            self.dtb[key] = len(self.dtb)
            assert len(self.dtb) <= len(
                self.pipe_data), "#tasks > #pipes [remote task manager]"

    def reset(self, bot_id, objective_id, seed):
        ind = self._ind(bot_id, objective_id)
        args = (ind, (bot_id, objective_id), seed)
        self.pipe_cmd.put(["reset", args])
        return self.pipe_data[ind].get()

    def step(self, bot_id, objective_id, action):
        ind = self._ind(bot_id, objective_id)
        args = (ind, (bot_id, objective_id), action)
        self.pipe_cmd.put(["step", args])
        return self.pipe_data[ind].get()
示例#26
0
def __backbone_process(backbone_cfg: list, recivq: Queue, sendq: Queue, timeout, run_semaphore, pause_event):
    # 实例化一个backbone里面所有的组件
    backbone_components = [__build_backbone_component(bbcfg) for bbcfg in backbone_cfg]
    logger = get_logger()
    logger.info('create backbone')
    try:
        while True:
            if not run_semaphore.value:
                logger.info('通过信号量停止了backbone')
                break
            pause_event.wait()
            kwargs = recivq.get(timeout=timeout)
            # 首先由该管道内的第一个组件处理数据
            kwargs = backbone_components[0](**kwargs)
            if len(backbone_components) > 1:
                # 如果该管道有多个component的话依次将数据交给之后的component处理
                for backbone_component in backbone_components[1:]:
                    kwargs = backbone_component(**kwargs)
            # print('backbone sendq len is {}'.format(sendq.qsize()))
            if kwargs is not None:
                for img_info in kwargs['imgs_info']:
                    sendq.put(img_info, timeout=timeout)
    except KeyboardInterrupt:
        logger.info('user stop a backbone_process process')
    except Empty:
        logger.info('backbone normal stoped')
    except Full as e:
        logger.exception(e)
        logger.warning('通向主进程的队列已满,请检查主进程是否正常取出数据')
    except Exception as e:
        logger.exception(e)
        logger.info('发生不可忽视的错误,因此强制停止整个后台程序运行,请检查log输出定位错误')
        # import signal
        # os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
    finally:
        logger.info('release backbone source')
        del logger
        recivq.cancel_join_thread()
        sendq.cancel_join_thread()
        recivq.close()
        sendq.close()
    return
示例#27
0
def mixup_process_worker_wrapper(q_input: mp.Queue, q_output: mp.Queue):
    """
	:param q_input:		input queue
	:param q_output:	output queue
	:param device:		running gpu device
	"""
    # os.environ["CUDA_VISIBLE_DEVICES"] = f"{device}" # not to call torch.cuda initializer in device-0
    # print(f"cuda visible devices = {device}")
    # device = torch.device(f"cuda:0")
    while True:
        # get args
        key, out, target_reweighted, param_list, sc, A_dist, device = q_input.get(
        )

        # run
        out, target_reweighted = mixup_match(out, target_reweighted,
                                             param_list, sc, A_dist, device)

        # return args
        q_output.put([key, out, target_reweighted])
def _worker(
    reader: DatasetReader,
    input_queue: Queue,
    output_queue: Queue,
    num_active_workers: Value,
    num_inflight_items: Value,
    worker_id: int,
) -> None:
    """
    A worker that pulls filenames off the input queue, uses the dataset reader
    to read them, and places the generated instances on the output queue.  When
    there are no filenames left on the input queue, it decrements
    num_active_workers to signal completion.
    """
    logger.info(f"Reader worker: {worker_id} PID: {os.getpid()}")
    # Keep going until you get a file_path that's None.
    while True:
        file_path = input_queue.get()
        if file_path is None:
            # It's important that we close and join the queue here before
            # decrementing num_active_workers. Otherwise our parent may join us
            # before the queue's feeder thread has passed all buffered items to
            # the underlying pipe resulting in a deadlock.
            #
            # See:
            # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#pipes-and-queues
            # https://docs.python.org/3.6/library/multiprocessing.html?highlight=process#programming-guidelines
            output_queue.close()
            output_queue.join_thread()
            # Decrementing is not atomic.
            # See https://docs.python.org/2/library/multiprocessing.html#multiprocessing.Value.
            with num_active_workers.get_lock():
                num_active_workers.value -= 1
            logger.info(f"Reader worker {worker_id} finished")
            break

        logger.info(f"reading instances from {file_path}")
        for instance in reader.read(file_path):
            with num_inflight_items.get_lock():
                num_inflight_items.value += 1
            output_queue.put(instance)
def _worker(reader: DatasetReader,
            input_queue: Queue,
            output_queue: Queue,
            index: int) -> None:
    """
    A worker that pulls filenames off the input queue, uses the dataset reader
    to read them, and places the generated instances on the output queue.
    When there are no filenames left on the input queue, it puts its ``index``
    on the output queue and doesn't do anything else.
    """
    # Keep going until you get a file_path that's None.
    while True:
        file_path = input_queue.get()
        if file_path is None:
            # Put my index on the queue to signify that I'm finished
            output_queue.put(index)
            break

        logger.info(f"reading instances from {file_path}")
        for instance in reader.read(file_path):
            output_queue.put(instance)
示例#30
0
def parallel_work_func(evaluator: BaseEvaluator,
                       tracker: BaseTracker,
                       dataset: Dataset,
                       gpu_id: int,
                       prefetch: bool,
                       work_dir: str,
                       idx_queue: multiprocessing.Queue,
                       result_queue: multiprocessing.Queue):
    torch.cuda.set_device(gpu_id)
    tracker.cuda()
    tracker.eval()
    while True:
        idx = idx_queue.get()
        seq = dataset[idx]
        i_result = evaluator.run_sequence(tracker,
                                          seq,
                                          use_gpu=True,
                                          zero_based_index=dataset.zero_based_index,
                                          prefetch=prefetch,
                                          work_dir=work_dir)
        result_queue.put((idx, i_result))
示例#31
0
文件: torch.py 项目: noe/seqp
def subprocess_prefetch(generator: Iterable[Union[np.array, Iterable[np.array]]],
                        prefetch_buffer_size: int,
                        )->Iterable[Union[np.array, Iterable[np.array]]]:
    """
    Wraps a generator to prefect batches in a separate subprocess. It can
    be used in a `with` block (which grants proper resource cleanup) or
    directly as a normal generator. It relies on the ability of
    torch.multiprocessing to load Tensors in shared memory; this way,
    the subprocess loads the numpy array from disk, creates a torch Tensor
    from it and then sends it through a Queue to the main process, which
    consumes it.

    :param generator: Generator to wrap.
    :param prefetch_buffer_size: Size of the prefetch buffer.
    :return: Wrapped generator.
    """
    batch_queue = Queue(prefetch_buffer_size)
    control_queue = Queue()
    Process(target=_enqueue_loader_output,
            args=(batch_queue, control_queue, generator)).start()
    control_queue.put(True)
    return _BatchIterator(batch_queue, control_queue)
示例#32
0
                              {'popsize': pop_size})

epoch = 0
log_step = 3
while not es.stop():
    if cur_best is not None and - cur_best > args.target_return:
        print("Already better than target, breaking...")
        break

    r_list = [0] * pop_size  # result list
    solutions = es.ask()

    # push parameters to queue
    for s_id, s in enumerate(solutions):
        for _ in range(n_samples):
            p_queue.put((s_id, s))

    # retrieve results
    if args.display:
        pbar = tqdm(total=pop_size * n_samples)
    for _ in range(pop_size * n_samples):
        while r_queue.empty():
            sleep(.1)
        r_s_id, r = r_queue.get()
        r_list[r_s_id] += r / n_samples
        if args.display:
            pbar.update(1)
    if args.display:
        pbar.close()

    es.tell(solutions, r_list)