Пример #1
0
def main():
    # construct the argument parser and parse the arguments
    ap = argparse.ArgumentParser()
    ap.add_argument("-m",
                    "--embedding-model",
                    default="net.pth",
                    help="path to the deep learning face embedding model")
    ap.add_argument("-r",
                    "--recognizer",
                    default="output/recognizer.pickle",
                    help="path to model trained to recognize faces")
    ap.add_argument("-l",
                    "--le",
                    default="output/le.pickle",
                    help="path to label encoder")
    ap.add_argument("-c",
                    "--confidence",
                    type=float,
                    default=0.45,
                    help="minimum probability to filter weak detections")
    ap.add_argument("-d",
                    "--detector",
                    default="face_detection_model",
                    help="path to OpenCV's deep learning face detector")

    args = vars(ap.parse_args())

    fq = Queue()
    pq = Queue()
    frame_length = 1 / 45.0
    pf = Process(target=get_frames, args=(fq, ))
    pf.start()
    dpf = Process(target=display_processed_frames, args=(pq, frame_length))
    dpf.start()
    process_frames(fq, pq, args)
Пример #2
0
class SC2Environment(environment.Environment):
    def __init__(self, env_args):
        super(SC2Environment, self).__init__()
        env = partial(make_sc2env, **env_args)
        self.conn, child_conn = Pipe()
        self.proc = Process(target=worker,
                            args=(child_conn, CloudpickleWrapper(env)))
        self.proc.start()
        self.reset()

    @staticmethod
    def get_action_size():
        return len(FUNCTIONS)

    def reset(self):
        self.conn.send([COMMAND_RESET, None])
        return [self.conn.recv()]

    def close(self):
        self.conn.send([COMMAND_TERMINATE, None])
        self.conn.close()
        self.proc.join()
        print("SC2 environment closed")

    def step(self, actions):
        self.conn.send([COMMAND_STEP, actions])
        obs = self.conn.recv()
        return [obs], obs.reward, obs.last()
Пример #3
0
def main(args):
    assert args.path is not None, '--path required for generation!'
    assert not args.sampling or args.nbest == args.beam, \
        '--sampling requires --nbest to be equal to --beam'
    assert args.replace_unk is None or args.dataset_impl == 'raw', \
        '--replace-unk requires a raw text dataset (--dataset-impl=raw)'

    assert args.results_path is None, 'We do not support setting results_path!'
    if args.results_path is not None:
        os.makedirs(args.results_path, exist_ok=True)
        output_path = os.path.join(args.results_path,
                                   'generate-{}.txt'.format(args.gen_subset))
        with open(output_path, 'w', buffering=1) as h:
            return _main(args, h)
    else:
        if args.ngpus == 1:
            return _main(args, sys.stdout)
        else:
            from torch.multiprocessing import Process
            processes = []
            world_size = args.ngpus
            backend = args.distributed_backend
            master_addr = args.distributed_master_addr
            master_port = args.distributed_master_port
            for rank in range(args.ngpus):
                p = Process(target=_main,
                            args=(args, sys.stdout, rank, world_size, backend,
                                  master_addr, master_port))
                p.start()
                processes.append(p)
            for p in processes:
                p.join()
Пример #4
0
def init_processes(f, size):
    processes=[]
    for rank in range(size):
        p = Process(target=init_process, args=(rank, size, f))
        p.start()
        processes.append(p)
    return lambda: [p.join() for p in processes]
Пример #5
0
    def _compare_parallel(self, network, opponent_network, device, num_workers):
        q, r = divmod(self.conf.GAMES_PER_COMPARISON, num_workers)
        num_active_workers = Value('i', num_workers)
        evaluator_mgr = BulkEvaluatorManager(
            [network, opponent_network], device, num_workers)
        score = Value('i', 0)

        workers = []
        s = 0
        for worker_id in range(num_workers):
            num_games = q + 1 if worker_id < r else q
            evaluator = evaluator_mgr.get_evaluator(worker_id, 0)
            opponent_evaluator = evaluator_mgr.get_evaluator(worker_id, 1)
            color = BLACK if s % 2 == 0 else WHITE
            s += num_games
            worker = Process(
                target=self._worker_job,
                args=(num_games, num_active_workers,
                      evaluator, opponent_evaluator, color, score),
            )
            workers.append(worker)
            worker.start()

        # start evaluator server
        server = evaluator_mgr.get_server(num_active_workers)
        server.start()

        for worker in workers:
            worker.join()
        server.join()

        return score.value / self.conf.GAMES_PER_COMPARISON
Пример #6
0
    def run_train(self, args):
        print("training...")

        model = self
        sim = Simulator(model)

        games = []
        for i in range(1):
            games.append(
                args.instance_class(args.vizdoom_config,
                                    args.wad_path,
                                    args.skiprate,
                                    actions=args.action_set,
                                    id=i))

        for iter in range(100):
            print("iteration: ", iter)
            #
            # generate data
            #
            processes = []
            for game in games:
                process = Process(target=self.generate_data,
                                  args=(game, sim, args))
                process.start()
                processes.append(process)

            for process in processes:
                process.join()
            #
            # train model with new data
            #
            self.train_model(model)
Пример #7
0
    def _call_with_qiterable(self,
                             qiterable: QIterable,
                             num_epochs: int,
                             shuffle: bool) -> Iterator[TensorDict]:
        # JoinableQueue needed here as sharing tensors across processes
        # requires that the creating tensor not exit prematurely.
        output_queue = JoinableQueue(self.output_queue_size)

        for _ in range(num_epochs):
            qiterable.start()

            # Start the tensor-dict workers.
            for i in range(self.num_workers):
                args = (qiterable, output_queue, self.iterator, shuffle, i)
                process = Process(target=_create_tensor_dicts_from_qiterable, args=args)
                process.start()
                self.processes.append(process)

            num_finished = 0
            while num_finished < self.num_workers:
                item = output_queue.get()
                output_queue.task_done()
                if isinstance(item, int):
                    num_finished += 1
                    logger.info(f"worker {item} finished ({num_finished} / {self.num_workers})")
                else:
                    yield item

            for process in self.processes:
                process.join()
            self.processes.clear()

            qiterable.join()
def init_jobs(queue, batch_size,num_features):
    # Process를 여러개 만들수도 있다. Peocess list로
    task = Process(target=prefetch_data, args=(queue, batch_size,num_features))

    task.daemon = True
    task.start()
    return task
Пример #9
0
def main(args):
    args.device = torch.device(
        "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    args.n_gpu = torch.cuda.device_count()

    set_seed(args)
    args.model_type = args.model_type.lower()

    if args.n_gpu > 1:
        # Independent multi-GPU evaluation
        all_processes = []
        all_input_files = split_file_on_disk(args.input_file, args.n_gpu)
        for gpu_idx in range(args.n_gpu):
            copy_args = copy.copy(args)
            if torch.cuda.is_available() and not args.no_cuda:
                copy_args.device = torch.device("cuda:" + str(gpu_idx))
            copy_args.n_gpu = 1
            copy_args.input_file = all_input_files[gpu_idx]
            copy_args.output_file = get_file_part_path(args.output_file,
                                                       gpu_idx)

            p = Process(target=run_generation, args=(copy_args, ))
            all_processes.append(p)
            p.start()

        for p in all_processes:
            p.join()

        combine_files_on_disk(args.output_file, args.n_gpu)

    else:
        run_generation(args)
Пример #10
0
    def __config(self) -> List[Process]:
        logger.debug(
            f"Configuring {self.num_workers} local workers /"
            f" {self.world_size} world size on node #{self.node_id}...")

        self.__config_master_protocol()
        self.__build_rank_range()

        self.local_workers = {}
        for rank in self.rank_range:
            process = Process(
                target=process_exec,
                args=(
                    self.node_id,
                    rank,
                    self.world_size,
                    self.dataset,
                    self.neural_network,
                    self.training,
                    self.num_epochs,
                    self.backend,
                    self.verbose,
                ),
            )
            self.local_workers[rank] = process

            logger.debug(f"Starting worker {rank}/{self.world_size-1} process"
                         f" on node #{self.node_id}...")
            process.start()
            logger.debug(
                f"Worker {rank}/{self.world_size-1} successfully started"
                f" on node #{self.node_id}...")

        return self.local_workers
 def __init__(self, game_factory: GameExecutorFactory, network: nn.Module,
              device: torch.device, processes: int, batches_ahead: int,
              batch_size: int, states_on_device: bool):
     self._states_on_device = states_on_device
     self._device = device
     self._experience_queue = Queue(maxsize=processes + 1)
     block_size = max(1, batches_ahead - processes)
     self.block_buffer = []
     print('* starting %d workers (batch size: %d, block size: %d)' %
           (processes, batch_size, block_size))
     self._processes = []
     self._request_queues = []
     for i in range(processes):
         request_queue = Queue(maxsize=10)
         # Transfer to GPU in the other process does not work.. it does not throw an error, but training does not converge
         p = Process(target=_run_game,
                     args=(
                         i,
                         game_factory,
                         network,
                         device,
                         request_queue,
                         self._experience_queue,
                         batch_size,
                         block_size,
                         False,
                     ))
         p.start()
         self._request_queues.append(request_queue)
         self._processes.append(p)
    def run(self, nepoch, batchsize=None, loss='variance', ndist=1):

        if ndist == 1:
            self.distributed_training = False
            self._worker(nepoch, batchsize, loss)

        else:

            self.distributed_training = True
            processes = []

            manager = Manager()
            obs_data = manager.list()

            for rank in range(ndist):
                p = Process(target=self.init_process,
                            args=(obs_data, rank, ndist, nepoch, batchsize,
                                  loss))
                p.start()
                processes.append(p)

            for p in processes:
                p.join()

        self.obs_dict = obs_data
def run_in_process_group(world_size, filename, fn, inputs):
    if torch.distributed.is_initialized():
        torch.distributed.destroy_process_group()
    processes = []
    q = Queue()
    wait_event = Event()

    # run the remaining processes
    # for rank in range(world_size - 1):
    for rank in range(world_size):
        p = Process(
            target=init_and_run_process,
            args=(rank, world_size, filename, fn, inputs[rank], q, wait_event),
        )
        p.start()
        processes.append(p)

    # fetch the results from the queue before joining, the background processes
    # need to be alive if the queue contains tensors. See
    # https://discuss.pytorch.org/t/using-torch-tensor-over-multiprocessing-queue-process-fails/2847/3  # noqa: B950
    results = []
    for _ in range(len(processes)):
        results.append(q.get())

    wait_event.set()

    for p in processes:
        p.join()
    return results
Пример #14
0
    def __init__(self, config: ParamDict, environment: Environment,
                 policy: Policy, filter_op: Filter):
        threads, gpu = config.require("threads", "gpu")
        threads_gpu = config["gpu threads"] if "gpu threads" in config else 2
        super(Agent_async, self).__init__(config, environment, policy,
                                          filter_op)

        # sync signal, -1: terminate, 0: normal running, >0 restart and waiting for parameter update
        self._sync_signal = Value('i', 0)

        # environment sub-process list
        self._environment_proc = []
        # policy sub-process list
        self._policy_proc = []

        # used for synchronize policy parameters
        self._param_pipe = None
        self._policy_lock = Lock()
        # used for synchronize roll-out commands
        self._control_pipe = None
        self._environment_lock = Lock()

        step_pipe = []
        cmd_pipe_child, cmd_pipe_parent = Pipe(duplex=True)
        param_pipe_child, param_pipe_parent = Pipe(duplex=False)
        self._control_pipe = cmd_pipe_parent
        self._param_pipe = param_pipe_parent
        for i_envs in range(threads):
            child_name = f"environment_{i_envs}"
            step_pipe_pi, step_pipe_env = Pipe(duplex=True)
            step_lock = Lock()
            worker_cfg = ParamDict({
                "seed": self.seed + 1024 + i_envs,
                "gpu": gpu
            })
            child = Process(target=Agent_async._environment_worker,
                            name=child_name,
                            args=(worker_cfg, cmd_pipe_child, step_pipe_env,
                                  self._environment_lock, step_lock,
                                  self._sync_signal, deepcopy(environment),
                                  deepcopy(filter_op)))
            self._environment_proc.append(child)
            step_pipe.append((step_pipe_pi, step_lock))
            child.start()

        for i_policies in range(threads_gpu):
            child_name = f"policy_{i_policies}"
            worker_cfg = ParamDict({
                "seed": self.seed + 2048 + i_policies,
                "gpu": gpu
            })
            child = Process(target=Agent_async._policy_worker,
                            name=child_name,
                            args=(worker_cfg, param_pipe_child, step_pipe,
                                  self._policy_lock, self._sync_signal,
                                  deepcopy(policy)))
            self._policy_proc.append(child)
            child.start()
        sleep(5)
def main():
    print('Starting')
    parser = argparse.ArgumentParser()
    # Configurable hyperparameters
    parser.add_argument('--rows',
                        type=int,
                        default=1,
                        help='Number of rows in the tensor.')
    parser.add_argument('--columns',
                        type=int,
                        default=1,
                        help='Number of columns in the tensor.')
    parser.add_argument('--backend',
                        type=str,
                        default=None,
                        help='backend for distributed operations.')

    # Container environment
    parser.add_argument('--hosts',
                        type=list,
                        default=json.loads(os.environ["SM_HOSTS"]))
    parser.add_argument('--current-host',
                        type=str,
                        default=os.environ["SM_CURRENT_HOST"])
    parser.add_argument('--model-dir',
                        type=str,
                        default=os.environ["SM_MODEL_DIR"])
    parser.add_argument('--num-gpus',
                        type=int,
                        default=os.environ["SM_NUM_GPUS"])
    parser.add_argument('--num-cpus',
                        type=int,
                        default=os.environ["SM_NUM_CPUS"])

    args = parser.parse_args()

    number_of_processes = args.num_gpus if args.num_gpus > 0 else args.num_cpus
    world_size = number_of_processes * len(args.hosts)
    logger.info(
        'Running \'{}\' backend on {} nodes and {} processes. World size is {}.'
        .format(args.backend, len(args.hosts), number_of_processes,
                world_size))
    host_rank = args.hosts.index(args.current_host)
    master_addr = args.hosts[0]
    master_port = '55555'
    processes = []
    for rank in range(number_of_processes):
        process_rank = host_rank * number_of_processes + rank
        p = Process(target=init_processes,
                    args=(args.backend, master_addr, master_port, process_rank,
                          world_size, args.rows, args.columns,
                          args.current_host, args.num_gpus))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()

    save('success', args.model_dir)
Пример #16
0
    def run(self):
        # mp.set_start_method('spawn', force=True)
        with Manager() as manager:
            d = manager.dict()
            self.d = d
            d['train_progress'] = []
            d['best_epoch'] = None
            d['best_epoch_summary'] = None
            d['model'] = None
            d['labels'] = None
            pqueue = mp.Queue()
            out_pqueue = mp.Queue()
            model = self.trainer_kwargs['model']
            # model = copy.deepcopy(model)
            model.share_memory()
            self.trainer_kwargs['model'] = model
            self.trainer_kwargs['num_workers'] = 0
            p = Process(target=main_q, args=(pqueue, out_pqueue, d))
            p.daemon = True
            p.start()
            # pool.apply_async(main_q, args=(pqueue, out_pqueue, d, ))
            # pool.apply_async(main_train, args=(d, self.num_epochs, self.trainer_args, self.trainer_kwargs, self.datbaase_items))
            # pool.starmap(main_q, [(pqueue, out_pqueue, d),])
            pqueue.put(None)
            pqueue.put(self.num_epochs)
            pqueue.put(self.trainer_args)
            pqueue.put(self.trainer_kwargs)
            pqueue.put(self.database_items)
            p.join()
            # pool.close()
            # pool.join()
            print('Process results: ', len(d.keys()))
            # best_epoch = d['best_epoch']
            # best_epoch_sumamry = d['best_epoch_summary']
            # model = d['model']
            # labels = d['labels']
            self.d = get_queue_dict(out_pqueue,
                                    item_names=[
                                        'best_epoch',
                                        'best_epoch_summary',
                                        'model',
                                        'labels',
                                    ])
            best_epoch = self.d['best_epoch']
            best_epoch_sumamry = self.d['best_epoch_summary']
            model = model.load_state_dict(self.d['model'])
            labels = self.d['labels']

            self.d = {
                "train_progress": d['train_progress'],
            }
        # best_epoch, best_epoch_summary = self.trainer.train(epochs=self.num_epochs)
        self.complete_func(
            self.host, {
                "best_epoch": best_epoch,
                "best_epoch_summary": best_epoch_sumamry,
                "model": model,
                "labels": labels,
            })
Пример #17
0
def init_parallel_jobs(cfg, queue, fn, ped_data=None, emp_data=None):
    tasks = Process(target=prefetch_data, args=(cfg, queue, fn, ped_data, emp_data))
    # for task in tasks:
    #     task.daemon = True
    #     task.start()
    tasks.daemon = True
    tasks.start()
    return tasks
Пример #18
0
def main(args):
    import torchvision.transforms as transforms
    from torch.multiprocessing import Process
    from lib.utils.process_data import load_data, get_word_frequencies
    from lib.utils.vocabulary import load_vocab
    from lib.utils.data_loader import get_split_data_set
    """Loading Data"""
    train_data, val_data, test_data, image_ids, topic_set = load_data(
        args.data_dir)
    data = {'train': train_data, 'val': val_data}
    transform = {
        'train':
        transforms.Compose([
            transforms.Resize(256),
            transforms.RandomCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
        'val':
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
    }
    vocabs = load_vocab(args.data_dir, min_occurrences=args.min_occurrences)
    word_frequencies = get_word_frequencies(train_data, vocabs['word_vocab'])
    while args.batch_size % args.num_gpus != 0:
        args.batch_size += 1
    split_data = {
        x: get_split_data_set(data[x],
                              args.batch_size // args.num_gpus,
                              vocabs,
                              args.data_dir,
                              transform[x],
                              args.num_gpus,
                              randomize=True,
                              max_size=args.max_size)
        for x in ['train', 'val']
    }
    if args.num_gpus > 1:
        processes = []
        for rank in range(args.num_gpus):
            p = Process(target=init_processes,
                        args=(rank, args.num_gpus, split_data, vocabs,
                              word_frequencies, args, run))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()
    else:
        run(0, 1, split_data, vocabs, args)
Пример #19
0
    def __init__(self,
                 data,
                 batch_size,
                 num_steps=1,
                 sample_coverage=50,
                 save_dir=None,
                 num_workers=0,
                 log=True):
        assert data.edge_index is not None
        assert 'node_norm' not in data
        assert 'edge_norm' not in data

        self.N = N = data.num_nodes
        self.E = data.num_edges

        self.adj = SparseTensor(row=data.edge_index[0],
                                col=data.edge_index[1],
                                value=data.edge_attr,
                                sparse_sizes=(N, N))

        self.data = copy.copy(data)
        self.data.edge_index = None
        self.data.edge_attr = None

        self.batch_size = batch_size
        self.num_steps = num_steps
        self.sample_coverage = sample_coverage
        self.num_workers = num_workers
        self.log = log
        self.__count__ = 0

        if self.num_workers > 0:
            self.__sample_queue__ = Queue()
            self.__sample_workers__ = []
            for _ in range(self.num_workers):
                worker = Process(target=self.__put_sample__,
                                 args=(self.__sample_queue__, ))
                worker.daemon = True
                worker.start()
                self.__sample_workers__.append(worker)

        path = osp.join(save_dir or '', self.__filename__)
        if save_dir is not None and osp.exists(path):  # pragma: no cover
            self.node_norm, self.edge_norm = torch.load(path)
        else:
            self.node_norm, self.edge_norm = self.__compute_norm__()
            if save_dir is not None:  # pragma: no cover
                torch.save((self.node_norm, self.edge_norm), path)

        if self.num_workers > 0:
            self.__data_queue__ = Queue()
            self.__data_workers__ = []
            for _ in range(self.num_workers):
                worker = Process(target=self.__put_data__,
                                 args=(self.__data_queue__, ))
                worker.daemon = True
                worker.start()
                self.__data_workers__.append(worker)
Пример #20
0
def init_main(scatter_list, sr):
    size = 10
    processes = []
    port = 29500
    # output = init_processes(port,'127.0.0.1',0,size,run,scatter_list,sr)
    p = Process(target=init_processes,
                args=(port, '127.0.0.1', 0, size, run, scatter_list, sr))
    # init_processes(port,'127.0.0.1',0,size,run,scatter_list,sr)
    p.start()
Пример #21
0
def test_log_buffer(world_size):
    processes = []
    for rank in range(world_size):
        p = Process(target=init_process, args=(rank, world_size, run))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
Пример #22
0
def run_local(size):
    processes = []
    for rank in range(size):
        p = Process(target=init_processes, args=(rank, size, run))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
Пример #23
0
class Actor:
    def __init__(self, inputs):
        self.inputs = inputs
        self.process = TorchProcess(target=self.act, daemon=True)
        self.process.start()

    def act(self):
        # print(torch.ones((12,23,42)).sum())
        torch.multiprocessing.set_sharing_strategy('file_system')
        args, experiment_name, i, lock, stats_queue, device, \
            obs, actions, logprobs, rewards, dones, values = self.inputs
        # obs = to_numpy(obs_sm, 5)
        envs = []
        # o = np.ones((210, 160, 3))
        # print(o.sum())
        # print(torch.ones((84,160,3)).sum())
        # raise
        
        def make_env(gym_id, seed, idx):
            env = gym.make(gym_id)
            env = wrap_atari(env)
            env = gym.wrappers.RecordEpisodeStatistics(env)
            env = wrap_deepmind(
                env,
                clip_rewards=True,
                frame_stack=True,
                scale=False,
            )
            env.seed(seed)
            env.action_space.seed(seed)
            env.observation_space.seed(seed)
            return env
        envs = [make_env(args.gym_id, args.seed+i, i) for i in range(args.num_envs)]
        envs = np.array(envs, dtype=object)
    
    
        for env_idx, env in enumerate(envs):
            env.reset()
            # print('Process %d finished resetting %d/%d envs', env_idx + 1, len(envs))
        last_report = last_report_frames = total_env_frames = 0
        while True:
            for env_idx, env in enumerate(envs):
                # os = []
                for step in range(args.num_steps):
    
                    action = env.action_space.sample()
                    o, r, d, info = env.step(action)
                    if d:
                        o = env.reset()
                    obs[i,env_idx,0,0,step] = np.array(o)
    
                    num_frames = 1
                    total_env_frames += num_frames
        
                    if 'episode' in info.keys():
                        stats_queue.put(info['episode']['l'])
Пример #24
0
    def run(self, *args, **kwargs):
        processes = []
        for rank, mode in enumerate(self.world):
            p = Process(target=self.init_process,
                        args=(rank, args, kwargs, mode))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()
Пример #25
0
    def train(self, size=2):
        processes = []
        for rank in range(size):
            p = Process(target=self.init_processes,
                        args=(rank, size, self.run))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()
Пример #26
0
    def run(self, *args, **kwargs):
        processes = []
        for rank, (mode, device) in enumerate(self.world):
            kwargs.update({"mode": mode, "device": device})
            p = Process(target=self.init_process, args=(rank, args, kwargs))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()
Пример #27
0
class DataLoaderMultiFiles(object):
    """DataLoader to iterator over a set of DataSet"""
    def __init__(self, filepaths, partial, batch_s, buffer_s):
        self.filepaths = filepaths
        self.partial = partial
        self.batch_size = batch_s
        self.max_len = buffer_s
        self.buffer = Queue(maxsize=buffer_s)
        self.batch_queue = Queue(maxsize=10)

    def __iter__(self):
        print('Starting processes')
        random.seed(0)
        random.shuffle(self.filepaths)
        filepaths = deque()
        for path in self.filepaths:
            filepaths.append(path)
        self.buffr_processes = []
        args = (self.filepaths, self.buffer, self.partial)
        for i in range(10):
            process = Process(target=fill_buffer, args=args)
            process.daemon = True
            process.start()
            self.buffr_processes.append(process)

        args = (self.buffer, self.batch_queue, self.batch_size)
        self.batch_process = Process(target=fill_batch, args=args)
        self.batch_process.daemon = True
        self.batch_process.start()
        return self

    def done_files(self):
        return sum([e.is_alive() for e in self.buffr_processes])

    def __next__(self):
        # print('get batch')
        # print('buffer_queue: {}, batch_queue: {}'.format(self.buffer.qsize(), self.batch_queue.qsize())) # noqa
        timeout = 1 if self.done_files() == 0 else 60
        try:
            batch = self.batch_queue.get(timeout=timeout)
        except Empty:
            self.kill()
            raise StopIteration
        # print('got batch')
        tmp = LongTensor(batch)
        # print('computing')
        return tmp

    def kill(self):
        print('Killing processes')
        self.buffr_process.terminate()
        self.batch_process.terminate()

    def __del__(self):
        self.kill()
Пример #28
0
def train(Model, model_args):
    # Run one worker node for each gpu
    gpus = model_args['gpus']
    model_args["distributed"]["world_size"] *= len(gpus)
    processes = []
    for gpu in gpus:
        p = Process(target=launch_worker_thread, args=(gpu, Model, model_args))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()
Пример #29
0
def main():

    size = 4
    processes = []
    for i in range(size):
        p = Process(target=init_processes, args=(i, size, run))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
Пример #30
0
def train_dist(args, myargs):
    myargs.writer.close()
    size = args.world_size
    processes = []
    for rank in range(size):
        p = Process(target=init_processes, args=(rank, size, args, myargs))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
    def _instances(self, file_path: str, manager: Manager, output_queue: Queue) -> Iterator[Instance]:
        """
        A generator that reads instances off the output queue and yields them up
        until none are left (signified by all ``num_workers`` workers putting their
        ids into the queue).
        """
        shards = glob.glob(file_path)
        num_shards = len(shards)

        # If we want multiple epochs per read, put shards in the queue multiple times.
        input_queue = manager.Queue(num_shards * self.epochs_per_read + self.num_workers)
        for _ in range(self.epochs_per_read):
            random.shuffle(shards)
            for shard in shards:
                input_queue.put(shard)

        # Then put a None per worker to signify no more files.
        for _ in range(self.num_workers):
            input_queue.put(None)

        processes: List[Process] = []
        num_finished = 0

        for worker_id in range(self.num_workers):
            process = Process(target=_worker,
                              args=(self.reader, input_queue, output_queue, worker_id))
            logger.info(f"starting worker {worker_id}")
            process.start()
            processes.append(process)

        # Keep going as long as not all the workers have finished.
        while num_finished < self.num_workers:
            item = output_queue.get()
            if isinstance(item, int):
                # Means a worker has finished, so increment the finished count.
                num_finished += 1
                logger.info(f"worker {item} finished ({num_finished}/{self.num_workers})")
            else:
                # Otherwise it's an ``Instance``, so yield it up.
                yield item

        for process in processes:
            process.join()
        processes.clear()