示例#1
0
    def __init__(self, num_threads: int):
        if not isinstance(num_threads, int):
            raise pyrado.TypeErr(given=num_threads, expected_type=int)
        if num_threads < 1:
            raise pyrado.ValueErr(given=num_threads, ge_constraint="1")

        self._num_threads = num_threads
        if not ENABLE_SINGLE_WORKER_OPTIMIZATION or num_threads > 1:
            # Create workers
            self._workers = [
                _WorkerInfo(i + 1) for i in range(self._num_threads)
            ]
            self._manager = mp.Manager()
        self._G = GlobalNamespace()
示例#2
0
    def run_games_for_agent(self, agent_number, agent_class):
        """Runs a set of games for a given agent, saving the results in self.results"""
        agent_results = []
        agent_name = agent_class.agent_name
        agent_group = self.agent_to_agent_group[agent_name]
        agent_round = 1
        # print("!!", self.config.environment)
        # print(self.config.environment._max_episode_steps)

        #&&&&&&&&&&&&
        agent_config = copy.deepcopy(self.config)

        if self.environment_has_changeable_goals(agent_config.environment) \
                and self.agent_cant_handle_changeable_goals_without_flattening(agent_name):
            print("Flattening changeable-goal environment for agent {}".format(
                agent_name))
            agent_config.environment = FlattenDictWrapper(
                agent_config.environment,
                dict_keys=["observation", "desired_goal"])

        if self.config.randomise_random_seed:
            agent_config.seed = random.randint(0, 2**32 - 2)
        agent_config.hyperparameters = agent_config.hyperparameters[
            agent_group]
        print("AGENT NAME: {}".format(agent_name))

        manager = mp.Manager()
        return_q = manager.Queue()
        agent = agent_class(agent_config)
        self.environment_name = agent.environment_title
        jobs = []
        for i in range(self.config.runs_per_agent):
            p = mp.Process(target=agent.run_n_episodes, args=(return_q, ))
            jobs.append(p)
            p.start()

        for proc in jobs:
            proc.join()

        # print("(GridTrainer.py) process end!")
        for game_scores, rolling_scores, time_taken in iter(
                return_q.get, None):
            agent_results.append([
                game_scores, rolling_scores,
                len(rolling_scores), -1 * max(rolling_scores), time_taken
            ])
            if return_q.empty():
                break
        #&&&&&&&&&
        self.results[agent_name] = agent_results
示例#3
0
 def parallelize_sessions(self, global_nets=None):
     mp_dict = mp.Manager().dict()
     workers = []
     spec = deepcopy(self.spec)
     for _s in range(spec['meta']['max_session']):
         spec_util.tick(spec, 'session')
         w = mp.Process(target=mp_run_session,
                        args=(spec, global_nets, mp_dict))
         w.start()
         workers.append(w)
     for w in workers:
         w.join()
     session_metrics_list = [mp_dict[idx] for idx in sorted(mp_dict.keys())]
     return session_metrics_list
示例#4
0
def spawn_train(cfg):
    # print(torch.cuda.nccl.version())
    # mp.set_start_method("spawn")
    manager = mp.Manager()
    return_dict = manager.dict()
    jobs = []
    for i in range(cfg.world_size):
        p = mp.Process(target=train_ddp, args=(i, cfg, return_dict))
        jobs.append(p)
        p.start()

    for proc in jobs:
        proc.join()

    return return_dict
示例#5
0
def parallel_predict(nets, data, data_dir, configs):
    processes = []
    #  pred_probs = mp.Array('probs', range(len(nets)))
    #  mAPs = mp.Array('mAPs', range(len(nets)))
    manager = mp.Manager()
    pred_probs = manager.dict()
    for view in range(len(nets)):
        p = mp.Process(target=wrap_predict,
                       args=(nets[view], data, data_dir, configs[view], view,
                             pred_probs))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()
    return pred_probs.values()
示例#6
0
 def connect(self, jov):
     '''--------------------------------
         connect maze and jovian: 
         1. shared_cue_dict.  := {cue_name: cue_pos}
         2. shared_cue_height := {cue_name: cue_height}
         3. coord transformations
        --------------------------------
     '''
     self.jov = jov
     mgr = multiprocessing.Manager()
     self.shared_cue_dict = mgr.dict()
     self.jov.set_trigger(self.shared_cue_dict)
     self.jov.shared_cue_height = self.cues_height
     self.jov._to_maze_coord = self._to_maze_coord
     self.jov._to_jovian_coord = self._to_jovian_coord
     self.is_jovian_connected = True
示例#7
0
文件: main.py 项目: rungsiman/smart
def main(stage, pipeline, dataset):
    if pipeline == 'literal':
        experiment = LiteralExperimentConfig(dataset, **kwargs)
    else:
        experiment = HybridExperimentConfig(dataset, **kwargs)

    set_seed(experiment)
    describe_devices()
    print(experiment.describe())

    world_size = experiment.num_gpu or torch.cuda.device_count()

    with mp.Manager() as manager:
        shared = manager.dict()
        lock = manager.Lock()
        mp.spawn(process, args=(world_size, experiment, stage, pipeline, shared, lock), nprocs=world_size, join=True)
def propagate(nnf, feat_A, feat_AP, feat_B, feat_BP, patch_size, iters=2, rand_search_radius=200):
    print("\tpatch_size:{}; num_iters:{}; rand_search_radius:{}".format(patch_size, iters, rand_search_radius))

    nnd = np.zeros(nnf.shape[:2])
    A_size = feat_A.shape[:2]
    B_size = feat_B.shape[:2]

    for ay in range(A_size[0]):
        for ax in range(A_size[1]):
            by, bx = nnf[ay, ax]
            nnd[ay, ax] = cal_dist(ay, ax, by, bx, feat_A, feat_AP, feat_B, feat_BP, A_size, B_size, patch_size)

    manager = mp.Manager()
    q = manager.Queue(A_size[1] * A_size[0])
    cpus = min(mp.cpu_count(), A_size[0] // 20 + 1)
    for i in range(iters):

        p = Pool(cpus)

        ay_start = 0

        while ay_start < A_size[0]:
            ax_start = 0
            while ax_start < A_size[1]:
                p.apply_async(pixelmatch, args=(q, ax_start, ay_start,
                                                cpus,
                                                nnf, nnd,
                                                A_size, B_size,
                                                feat_A, feat_AP,
                                                feat_B, feat_BP,
                                                patch_size,
                                                rand_search_radius,))

                ax_start += A_size[1] // cpus + 1
            ay_start += A_size[0] // cpus + 1

        p.close()
        p.join()

        while not q.empty():
            ax, ay, xbest, ybest, dbest = q.get()

            nnf[ay, ax] = np.array([ybest, xbest])
            nnd[ay, ax] = dbest

    return nnf, nnd
示例#9
0
def decode_process(argv, qdecoder):
    pool = mp.Pool(argv.threads)
    manager = mp.Manager()
    write_mutex = manager.Value('i', 1)
    while True:
        item = qdecoder.get(timeout=200)
        try:
            qdecoder_size = qdecoder.qsize()
            print('\n current qdecoder size: ', qdecoder_size)
        except NotImplementedError:
            pass
        if item is None:
            print('decoder, qdoceder is None')
            pool.close()
            pool.join()
            return
        pool.apply_async(func=writer, args=(argv, item, write_mutex))
示例#10
0
def main():
    print('starting')

    m = mp.Manager()
    lock = m.Lock()
    processes = []
    for rank in range(4):
        p = mp.Process(target=runner, args=(lock, rank,))
        p.start()
        processes.append(p)
        print('started')

    print('waiting for processes to finish')
    for p in processes:
        p.join()
        print('join')
    print('done')
示例#11
0
def main(args):

    # logger
    logger = logging.getLogger()

    # data prep
    loader_aug = data.loaders.DataLoaderWrapper(args)

    # get model, optimizer, loss
    cae = model.cae.ConvolutionalAutoEncoder(loader_aug.img_shape,
                                             args.embedding_size, args.dropout)
    cae.load_state_dict(torch.load(args.model))
    logger.info("Trainable model parameters: %d" %
                sum(p.numel() for p in cae.parameters() if p.requires_grad))

    global_step = 0

    manager = multiprocessing.Manager()
    queue = manager.Queue()
    consumer = multiprocessing.Process(target=writetolmdb,
                                       args=(args.output, queue,
                                             len(loader_aug.ds)),
                                       name="Reporting")

    if args.cuda:
        cae.cuda()

    try:
        consumer.start()
        c_process = psutil.Process(consumer.pid)
        this_process = psutil.Process()

        with torch.autograd.detect_anomaly():
            for b_i, batch in enumerate(tqdm(loader_aug, leave=False)):
                global_step += 1

                if args.cuda:
                    batch = batch.cuda()

                embedding = cae.encoder(batch)
                queue.put(embedding.cpu())

    finally:
        queue.put(None)
        consumer.join()
    def _create_mapper_rollouts(self, ans_cfg):

        V = ans_cfg.MAPPER.map_size
        imH, imW = ans_cfg.image_scale_hw

        mapper_observation_space = {
            "rgb_at_t": spaces.Box(
                low=0.0, high=255.0, shape=(imH, imW, 3), dtype=np.float32
            ),
            "depth_at_t": spaces.Box(
                low=0.0, high=255.0, shape=(imH, imW, 1), dtype=np.float32
            ),
            "ego_map_gt_at_t": spaces.Box(
                low=0.0, high=1.0, shape=(V, V, 2), dtype=np.float32
            ),
            "ego_map_gt_dilation_at_t": spaces.Box(
                low=0.0, high=1.0, shape=(V, V, 2), dtype=np.float32
            ),
            "pose_at_t": spaces.Box(
                low=-100000.0, high=100000.0, shape=(3,), dtype=np.float32
            ),
            "pose_gt_at_t": spaces.Box(
                low=-100000.0, high=100000.0, shape=(3,), dtype=np.float32
            ),
            "ego_map_gt_anticipated_at_t": self.envs.observation_spaces[0].spaces[
                "ego_map_gt_anticipated"
            ],
        }

        mapper_observation_space = spaces.Dict(mapper_observation_space)

        # Multiprocessing manager
        mapper_manager = mp.Manager()
        mapper_device = self.device
        if ans_cfg.MAPPER.use_data_parallel and len(ans_cfg.MAPPER.gpu_ids) > 0:
            mapper_device = ans_cfg.MAPPER.gpu_ids[0]
            
        mapper_rollouts = MapLargeRolloutStorageMP(
            ans_cfg.MAPPER.replay_size,
            mapper_observation_space,
            mapper_device,
            mapper_manager,
        )

        return mapper_rollouts
示例#13
0
def run_job(logger, opt, output_dir, train):
    device_id = allocate_device()
    opt_override = {'device': device_id}

    def merge(a, b):
        d = {}
        d.update(a)
        d.update(b)
        return d

    # opt = {**opt, **opt_override}
    opt = merge(opt, opt_override)
    logger.info('new job: job_id={}, device_id={}'.format(
        opt['job_id'], opt['device']))
    try:
        logger.info("spawning process: job_id={}, device_id={}".format(
            opt['job_id'], opt['device']))

        try:
            output_dir_thread = os.path.join(output_dir, str(opt['job_id']))
            os.makedirs(output_dir_thread, exist_ok=True)

            # logger_thread = setup_logging('job{}'.format(opt['job_id']), output_dir_thread, console=True)

            run_job_lock.acquire()
            manager = multiprocessing.Manager()
            return_dict = manager.dict()
            p = multiprocessing.Process(target=train,
                                        args=(opt, output_dir,
                                              output_dir_thread, return_dict))
            p.start()

        finally:
            run_job_lock.release()

        p.join()

        logger.info('finished process: job_id={}, device_id={}'.format(
            opt['job_id'], opt['device']))

        if not 'stats' in return_dict:
            raise ValueError('train() did not populate return_dict with stats')
        return return_dict['stats']
    finally:
        free_device(device_id)
示例#14
0
    def __init__(self, game, nnet, args, multiprocessing=False):
        self.game = game
        self.nnet = nnet  # queue/pipe connection, not nnet itself
        self.args = args
        self.multiprocessing = multiprocessing
        if multiprocessing:
            self.queue = mp.Manager().Queue()
            # pipeSend, pipeRecv = mp.Pipe()
            # self.pipeSend = pipeSend
            # self.pipeRecv = pipeRecv

        self.Qsa = {}  # stores Q values for s,a (as defined in the paper)
        self.Nsa = {}  # stores #times edge s,a was visited
        self.Ns = {}  # stores #times board s was visited
        self.Ps = {}  # stores initial policy (returned by neural net)

        self.Es = {}  # stores game.getGameEnded ended for board s
        self.Vs = {}  # stores game.getValidMoves for board s
def run(C=0.1,
        d_startRound_arr=[(0, 0), (25, 0)],
        rounds=250,
        client_epochs=5,
        batch_size=64,
        num_proc=2):
    m = int(max(round(C * K), 1))

    # SAVE_TRAIN = True
    # EMBED_DIMS = 50

    M = mp.Manager()
    dl = M.list()
    lock = M.Lock()
    s_lock = M.Lock()
    s_idx = M.Value("i", 0, lock=True)

    run_kws = dict(dl=dl,
                   lock=lock,
                   s_idx=s_idx,
                   s_lock=s_lock,
                   D_r=d_startRound_arr,
                   R=rounds,
                   E=client_epochs,
                   B=batch_size,
                   C=C)
    # m=m,)

    print("{}|processes: {}, C: {}, m: {}, dir: {}".format(
        time.ctime(), num_proc, C, m, result_dir))
    print(run_kws)

    for k in range(len(nodes_df)):
        dl.append(None)

    processes = []
    for rank in range(num_proc):
        p = mp.Process(target=init_processes,
                       args=(rank, num_proc, process, run_kws))
        p.start()
        processes.append(p)

    for p in processes:
        p.join()
示例#16
0
 def train(self):
     for rnd in range(self.rounds):
         np.random.shuffle(self.nets_pool)
         pool = mp.Pool(self.num_per_rnd)
         self.q = mp.Manager().Queue()
         dict_new = self.global_agent.model.state_dict()
         if self.estimate_weights_in_center and rnd % self.interval == 0:
             w_d = self.global_agent.estimate_weights(self.policy)
         else:
             w_d = None
         for net in self.nets_pool[:self.num_per_rnd]:
             net.model.load_state_dict(dict_new)
             net.set_lr(self.global_agent.lr)
             pool.apply_async(
                 train_local_mp,
                 (net, self.local_epochs, rnd, self.q, self.policy, w_d))
         pool.close()
         pool.join()
         self.update_global(rnd)
示例#17
0
def parallel_test(nets, test_data, configs):
    processes = []
    manager = mp.Manager()
    pred_probs = manager.dict()
    gt_y = test_data[1]
    for view, net in enumerate(nets):
        p = mp.Process(target=test_net,
                       args=(net, test_data, configs[view], view, pred_probs))
        p.start()
        processes.append(p)
    for p in processes:
        p.join()
    pred_probs = pred_probs.values()
    pred_y1 = np.argmax(pred_probs[0], axis=1)
    pred_y2 = np.argmax(pred_probs[1], axis=1)
    pred_y = np.argmax(sum(pred_probs), axis=1)
    print('view 1: %0.4f;   view 2: %0.4f;   fuse: %0.4f\n' %
          (np.mean(pred_y1 == gt_y), np.mean(pred_y2 == gt_y),
           np.mean(pred_y == gt_y)))
示例#18
0
    def __init__(self, n_readers=1, read_fn=_default_read_fn, n_retries=3):
        self._read_queue = mp.Queue()
        self._out_queue = mp.Queue()
        self._manager = mp.Manager()
        self._buf = self._manager.dict()

        self._read_fn = read_fn
        self._read_workers = [
            mp.Process(
                target=self.__read_worker,
                args=(self._read_queue, self._out_queue),
                daemon=True,
            ) for _ in range(n_readers)
        ]

        if platform.system() != "Windows":
            for w in self._read_workers:
                w.start()
        self.n_retries = n_retries
示例#19
0
def evaluation(model,
               name,
               adjacency,
               neproc,
               vectors=None,
               cuda=False,
               verbose=False):
    t_start = timeit.default_timer()
    adjacency = list(adjacency.items())
    chunk = int(len(adjacency) / neproc + 1)
    if vectors is not None:
        with torch.no_grad():
            vectors = Variable(torch.from_numpy(vectors).float())
            if cuda:
                vectors = vectors.cuda()
            embeds = model.module.embed(vectors)
    else:
        embeds = model.module.embed()

    queue = mp.Manager().Queue()
    processes = []
    for rank in range(neproc):
        if "sips" in name:
            p = mp.Process(target=eval_sips_thread,
                           args=(adjacency[rank * chunk:(rank + 1) * chunk],
                                 model, embeds, queue, rank == 0 and verbose))
        else:
            p = mp.Process(target=eval_thread,
                           args=(adjacency[rank * chunk:(rank + 1) * chunk],
                                 model, embeds, queue, rank == 0 and verbose))
        p.start()
        processes.append(p)

    ranks = list()
    ap_scores = list()

    for i in range(neproc):
        msg = queue.get()
        _ranks, _ap_scores = msg
        ranks += _ranks
        ap_scores += _ap_scores

    return np.mean(ranks), np.mean(ap_scores), timeit.default_timer() - t_start
示例#20
0
def main(args):
    assert args.render or not args.gif, 'If you want to display a gif, you must set render to true'
    if args.load is False and os.path.isfile('./model/breakout.pt'):
        while True:
            load = input(
                'Are you sure you want to erase the previous training? (y/n) ')
            if load.lower() in ('y', 'yes', '1'):
                break
            elif load.lower() in ('n', 'no', '0'):
                import sys
                sys.exit()

    # create shared variables between all the processes
    manager = mp.Manager()
    # used to send the results of the net
    common_dict = manager.dict()
    # a queue of batches to be fed to the training net
    mem_queue = manager.Queue(1500 * mp.cpu_count())
    # a queue of operations pending
    process_queue = manager.Queue(mp.cpu_count() - 1)

    with mp.Pool() as pool:
        try:
            workers: int = pool._processes
            print(f"Running pool with {workers//2} workers")
            pool.apply_async(
                gpu_thread,
                (args.load, mem_queue, process_queue, common_dict, [0, 1]))
            if args.render:
                pool.apply_async(cpu_thread,
                                 (2 if args.gif else 1, mem_queue,
                                  process_queue, common_dict, [2, 3]))
            for i in range(2 * (1 + args.render), workers, 2):
                pool.apply_async(
                    cpu_thread,
                    (0, mem_queue, process_queue, common_dict, [i, i + 1]))

            # Wait for children to finish
            pool.close()
            pool.join()
        except KeyboardInterrupt:
            pool.join()
示例#21
0
    def __init__(self, config):
        self.seed = config.seed
        self.config = config
        self.num_workers = config.num_workers

        if config.agent['name'] == 'ppo2':
            Transition = PPO_Transition
        elif (config.agent['name'] == 'cppo') or (config.agent['name'] == 'cppo2'):
            Transition = CPPO_Transition
        elif (config.agent['name'] == 'safe_sac'):
            Transition = Safe_TD_Transition
        else:
            Transition = TD_Transition

        if config.sampler_gpu_index == -1:
            self.device = torch.device('cpu')
        else:
            self.device = torch.device('cuda',
                                       index=config.sampler_gpu_index) if torch.cuda.is_available() else torch.device(
                'cpu')

        self.remotes, self.work_remotes = zip(*[mp.Pipe() for _ in range(self.num_workers)])

        self.manager = mp.Manager()
        # Sampler 接受消息的队列
        self.recv_queue = self.manager.Queue(self.num_workers)
        # Sampler 接受消息的队列的锁
        self.recv_lock = self.manager.Lock()

        if is_on_policy(self.config.agent['name']):
            self.buffer = Memory(Transition=Transition)
        else:
            self.buffer = ReplayBuffer(size=config.agent['buffer_size'], Transition=Transition)

        self.workers = [EnvWorker(id, remote, self.recv_queue, self.recv_lock, self.config, Transition)
                        for (id, remote) in zip(range(self.num_workers), self.work_remotes)]

        for worker in self.workers:
            worker.start()

        self.sample_iter = 0
        self.result_dict = {}
示例#22
0
    def eval_strategy_network(self, steps):
        print("\nEvaluating strategy network after {} steps".format(steps))
        self.strategy_network._network = self.strategy_network._network.cpu()
        self.strategy_network._device = torch.device("cpu")

        for p in self.strategy_network._network.parameters():
            assert (p.device == torch.device("cpu"))

        manager = mp.Manager()
        save_lock = manager.Lock()

        t0 = time.time()
        exploits = []

        strategies = {0: self.strategy_network, 1: self.strategy_network}

        for k in range(self.opt.NUM_TRAVERSALS_EVAL):
            sb_player_idx = k % 2
            round_state = create_new_round(sb_player_idx)
            precomputed_ev = make_precomputed_ev(round_state)
            info = traverse(round_state, make_actions, make_infoset, 0,
                            sb_player_idx, strategies, None, None, 0,
                            precomputed_ev)
            exploits.append(info.exploitability.sum())

        elapsed = time.time() - t0
        print("Time for {} eval traversals {} sec".format(
            self.opt.NUM_TRAVERSALS_EVAL, elapsed))

        mbb_per_game = 1e3 * torch.Tensor(exploits) / (
            2.0 * Constants.SMALL_BLIND_AMOUNT)
        mean_mbb_per_game = mbb_per_game.mean()
        stdev_mbb_per_game = mbb_per_game.std()

        writer = self.writers["train"]
        writer.add_scalar("strt_exploit_mbbg_mean", mean_mbb_per_game, steps)
        writer.add_scalar("strt_exploit_mbbg_stdev", stdev_mbb_per_game, steps)
        writer.close()
        print(
            "===> [EVAL] [STRATEGY] Exploitability | mean={} mbb/g | stdev={} | (steps={})"
            .format(mean_mbb_per_game, stdev_mbb_per_game, steps))
示例#23
0
文件: lpas.py 项目: gauenk/cl_gen
def execute_split_frame_search(T, fixed_frames, parallel, *args):
    procs, proc_limit = [], 10
    if parallel: blocks_i = mp.Manager().dict()
    else: blocks_i = {}
    for t in range(T):
        fixed_frames_t = copy.deepcopy(fixed_frames)
        if parallel:
            p = mp.Process(target=search_across_frame,
                           args=(t, blocks_i, fixed_frames_t, *args))
            p.start()
            procs.append(p)
            # -- wait and reset proc queue --
            if len(procs) == proc_limit:
                finish_procs(procs, proc_limit)
                procs = []
        else:
            search_across_frame(t, blocks_i, fixed_frames_t, *args)
    finish_procs(procs, proc_limit)
    blocks_i = [blocks_i[str(t)] for t in range(T)]
    # if parallel: blocks_i = copy.deepcopy(blocks_i)
    return blocks_i
def train(solved_score, population_size, elite_size, num_proc, log_video_rate):
    setup_logger()
    manager = mp.Manager()
    work_queue = manager.Queue()
    results_queue = manager.Queue()

    # Random Search 1st generation
    start_time = time.time()
    env = create_environment()
    population = create_population(env, population_size)
    print(population[0])
    elite, top_scores = get_top_performers_from_random_population(
        env, population, elite_size)
    elapsed_time = time.time() - start_time
    log_generation_stats(1, top_scores, elapsed_time)

    # 2nd -> inf generation: Mutate Top Performers (classic GA)
    ma_reward = 0
    spawn_processes(num_proc,
                    work_fn=mutate_and_evaluate_task,
                    args=(elite, work_queue, results_queue))
    for generation in count(start=2, step=1):
        start_time = time.time()
        spawn_mutation_work(work_queue, elite_size, population_size)

        evaluated_population = collect_results(results_queue,
                                               size=population_size)
        top_scores = get_top_performers(evaluated_population, elite,
                                        elite_size)
        elapsed_time = time.time() - start_time
        if generation % log_video_rate == 0:
            record_evaluation_video(elite[0], env)
        log_generation_stats(generation, top_scores, elapsed_time)

        ma_reward = 0.7 * ma_reward + 0.3 * top_scores.mean()
        if ma_reward >= solved_score:
            print(f"Solved in {generation} generations")
            kill_processes(work_queue, num_proc)
            break
示例#25
0
def run_distributed(create_env_fn, log_dir, Actor, Learner, num_actors,
                    configs):
    mp.freeze_support()

    shared_kwargs = {
        'shared_memory': mp.Queue(100),
        'shared_weights': mp.Manager().dict()
    }

    learner_kwargs = dict(
        env=create_env_fn(),
        log_dir=log_dir,
        Learner=Learner,
        **configs['common'],
        **configs['learner'],
        **shared_kwargs,
    )
    processes = [mp.Process(target=run_learner, kwargs=learner_kwargs)]

    for actor_id in range(num_actors):
        actor_kwargs = dict(
            env=create_env_fn(),
            log_dir=log_dir,
            Actor=Actor,
            actor_id=actor_id,
            num_actors=num_actors,
            **configs['common'],
            **configs['actor'],
            **shared_kwargs,
        )
        processes.append(
            mp.Process(target=run_actor, kwargs=actor_kwargs))

    for pi in range(len(processes)):
        processes[pi].start()

    for p in processes:
        p.join()
示例#26
0
    def run(self, num_neighbors, num_layers, num_workers):

        offset = self.start_idx
        batch_size = min(self.end_idx - self.start_idx + 1, self.batch_size)

        ###########################################
        # create queue to share data between process
        manager = mp.Manager()
        queue = manager.dict()

        ###########################################
        # multi-process to create graph
        process = GraphGenProcessor(queue, self.num_nodes, offset, batch_size, self.graph, self.interactions, self.neg_nodes, \
            self.times, num_neighbors, num_layers, num_workers)

        process.start()
        while (True):

            process.join()
            data = queue['data']

            offset += batch_size
            if offset >= self.end_idx:
                offset = self.start_idx

            # next batch
            batch_size = min(self.end_idx - self.start_idx + 1,
                             self.batch_size)

            queue = manager.dict()
            process = GraphGenProcessor(queue, self.num_nodes, offset, batch_size, self.graph, self.interactions, self.neg_nodes, \
            self.times, num_neighbors, num_layers, num_workers)

            ###########################################
            # start running next timestep process
            process.start()

            yield data
示例#27
0
def collect_train_samples_parallel(epoch, max_steps, objects, num_workers=10):
    """
        Purpose: collect rollouts for max_steps steps using num_workers workers
        Return: stats_collector
    """
    num_steps_per_worker = max_steps // num_workers
    num_residual_steps = max_steps - num_steps_per_worker * num_workers

    queue = mp.Manager().Queue()
    workers = []
    for i in range(num_workers):
        worker_steps = num_steps_per_worker + num_residual_steps if i == 0 else num_steps_per_worker
        worker_kwargs = dict(epoch=epoch,
                             max_steps=worker_steps,
                             objects=objects,
                             pid=i + 1,
                             queue=queue)
        workers.append(
            mp.Process(target=collect_train_samples_serial,
                       kwargs=worker_kwargs))
    for j, worker in enumerate(workers):
        worker.start()

    start = time.time()
    master_stats_collector = objects['stats_collector_builder']()
    for j, worker in enumerate(workers):
        worker_pid, worker_stats_data = queue.get()
        master_stats_collector.extend(worker_stats_data)
    end = time.time()
    objects['printer'](
        'Time to extend master_stats_collector: {}'.format(end - start))

    for j, worker in enumerate(workers):
        worker.join()

    assert master_stats_collector.get_total_steps() == max_steps
    return master_stats_collector
示例#28
0
def main(episodes, agent, num_processes):
    running_reward_array = []
    # lowered = False
    mp.set_start_method('spawn')
    for episode in range(episodes):
        successful_runs = 0
        master_reward, reward, running_reward = 0, 0, 0
        processes = []
        queueue = mp.Manager().Queue()
        for proc in range(num_processes):
            p = mp.Process(target=run_episode, args=(queueue, agent))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()
        while not queueue.empty():
            try:
                fake_out = queueue.get()
            except MemoryError as e:
                print(e)
                fake_out = [-13, None]
            if fake_out[0] != -13:
                master_reward += fake_out[0]
                running_reward_array.append(fake_out[0])
                agent.replay_buffer.extend(fake_out[1])
                successful_runs += 1

        if successful_runs > 0:
            reward = master_reward / float(successful_runs)
            agent.end_episode(reward, num_processes)
            running_reward = sum(running_reward_array[-100:]) / float(min(100.0, len(running_reward_array)))
        if episode % 50 == 0:
            print(f'Episode {episode}  Last Reward: {reward}  Average Reward: {running_reward}')
            print(f"Running {num_processes} concurrent simulations per episode")
        if episode % 500 == 0:
            agent.save('../models/' + str(episode) + 'th')
    return running_reward_array
示例#29
0
    def __init__(self, q, batch_size: int, num_workers: int, transform):
        """
        Args:
            q: A thread-safe queue. It should be multiprocessing.Manager().Queue or torch.multiprocessing.Manager().Queue.
            batch_size (int): the maximum size of batch.
            num_workers (int): the number of processes.
            transform: a function that receives a string (msg) and returns any object.
        """

        assert isinstance(q, mp.managers.BaseProxy) or isinstance(
            q, tmp.managers.BaseProxy)
        assert batch_size > 0
        assert num_workers > 0

        self.batch_size = batch_size
        self.num_workers = num_workers
        self.m = tmp.Manager()
        self.source = q
        self.sink = self.m.Queue(maxsize=batch_size * 3)

        self.pool = tmp.Pool(num_workers)
        for i in range(num_workers):
            r = self.pool.apply_async(self._worker_loop,
                                      (self.source, self.sink, transform))
示例#30
0
def main(episodes, agent, num_processes, ENV_NAME):
    running_reward_array = []
    for episode in range(episodes):
        master_reward = 0
        reward, running_reward = 0, 0
        processes = []
        q = mp.Manager().Queue()
        for proc in range(num_processes):
            p = mp.Process(target=run_episode, args=(q, agent, ENV_NAME))
            p.start()
            processes.append(p)
        for p in processes:
            p.join()
        while not q.empty():
            fake_out = q.get()
            master_reward += fake_out[0]
            running_reward_array.append(fake_out[0])
            agent.replay_buffer.extend(fake_out[1])

        tuple_out = run_episode(None, agent, ENV_NAME)
        master_reward += tuple_out[0]
        running_reward_array.append(tuple_out[0])
        agent.replay_buffer.extend(tuple_out[1])
        reward = master_reward / float(num_processes + 1)
        agent.end_episode(reward, num_processes)

        running_reward = sum(running_reward_array[-100:]) / float(
            min(100.0, len(running_reward_array)))
        print(episode)
        if episode % 50 == 0:
            print(
                f'Episode {episode}  Last Reward: {reward}  Average Reward: {running_reward}'
            )
        if episode % 500 == 0:
            agent.save('../models/' + str(episode) + 'th')
    return running_reward_array