Exemplo n.º 1
0
def test_ray_init(shutdown_only):
    def getpid(args):
        return os.getpid()

    def check_pool_size(pool, size):
        args = [tuple() for _ in range(size)]
        assert len(set(pool.map(getpid, args))) == size

    # Check that starting a pool starts ray if not initialized.
    pool = Pool(processes=2)
    assert ray.is_initialized()
    assert int(ray.state.cluster_resources()["CPU"]) == 2
    check_pool_size(pool, 2)
    ray.shutdown()

    # Check that starting a pool doesn't affect ray if there is a local
    # ray cluster running.
    ray.init(num_cpus=3)
    assert ray.is_initialized()
    pool = Pool(processes=2)
    assert int(ray.state.cluster_resources()["CPU"]) == 3
    check_pool_size(pool, 2)
    ray.shutdown()

    # Check that trying to start a pool on an existing ray cluster throws an
    # error if there aren't enough CPUs for the number of processes.
    ray.init(num_cpus=1)
    assert ray.is_initialized()
    with pytest.raises(ValueError):
        Pool(processes=2)
    assert int(ray.state.cluster_resources()["CPU"]) == 1
    ray.shutdown()
Exemplo n.º 2
0
def launch_long_running_tasks(num_pool=5):
    # doing the work, collecting data, updating the database
    # create an Actor pool of num_pool workers nodes
    pool = Pool(num_pool)
    results = []
    for result in pool.map(task, range(1, 500, 10)):
        results.append(result)
    pool.terminate()
    return results
Exemplo n.º 3
0
def test_initializer(shutdown_only):
    def init(dirname):
        with open(os.path.join(dirname, str(os.getpid())), "w") as f:
            print("hello", file=f)

    with tempfile.TemporaryDirectory() as dirname:
        num_processes = 4
        pool = Pool(processes=num_processes,
                    initializer=init,
                    initargs=(dirname, ))

        assert len(os.listdir(dirname)) == 4
        pool.terminate()
Exemplo n.º 4
0
def dist_mat(objs: t.Union[t.Sequence[nx.Graph], t.Sequence[t.Collection[SeqGene]]], n_jobs: int = 10) -> np.ndarray:
    """Compute distance matrix using `Dist(obj1, obj2)` <-
    sum of the number of different types at each position.
    """
    if isinstance(objs[0], nx.Graph):
        dist = graph_dist
    else:
        dist = seq_dist
    size = len(objs)
    base = np.zeros(shape=(size, size))
    staged_data = []
    for i in range(size):
        for j in range(size):
            if i <= j:
                staged_data.append((i, objs[i], j, objs[j]))
    staged_data = tqdm(
        staged_data,
        desc='Distance matrix')

    with Pool(n_jobs) as workers:
        distances = workers.starmap(dist, staged_data)
    for i, j, d in distances:
        base[i][j] = d
        base[j][i] = d
    return base
Exemplo n.º 5
0
    def run(self) -> Any:
        """
        Runs the pipeline with the aforementioned parallelization strategy if parallel is set to True. Otherwise, the
        pipeline is executed sequentially.
        :return:
        """

        assert not self.done
        if self.par_logger:
            pass
        if self.parallel:
            if self.par_logger:
                self.par_logger.logger.info(f'{self.__class__.__name__}: Initializing mappers')

            if self.backend == 'mp':
                with multiprocessing.Pool(initializer=self._initialize_mappers, initargs=(self.mappers_factory,)) \
                        as pool:
                            res = pool.map(self._map_f, self.streams)
            else:
                work_dir = os.getcwd()
                ray.init(address='auto', redis_password='******')
                with Pool(initializer=self._initialize_mappers, initargs=(self.mappers_factory, work_dir)) as pool:
                    res = pool.map(self._map_f, self.streams)
        else:
            self._initialize_mappers(self.mappers_factory)
            res = []
            for e in self.streams:
                res.append(self._map_f(e))

        if self.par_logger:
            self.par_logger.logger.info(f'{self.__class__.__name__}: Mapping pipeline executed')
        self.done = True
        return res
def splitDataset(dataset: str,
                 outputPath: str,
                 time: int,
                 inputPath: str = 'D://Datasets//Tsinghua'):
    if dataset == 'benchmark':
        assert 1500 % (time * 250) == 0
        num_blocks = 6
        n_segments = int(1500 / (time * 250))
    else:
        assert 750 % (time * 250) == 0
        num_blocks = 4
        n_segments = int(750 / (time * 250))

    datasetPath = Path(inputPath + '//{}//separated'.format(dataset))
    outputPath = Path(outputPath + '//{}//{}'.format(dataset, str(time * 250)))
    outputPath.mkdir(parents=True, exist_ok=True)
    sublists = [
        list(range(x, y)) for x, y in [(1, 11), (11, 21), (21, 31), (31, 41)]
    ]

    f = partial(_splitDataset,
                inputPath=datasetPath,
                outputPath=outputPath,
                n_segments=n_segments)
    with Pool(processes=4) as pool:
        pool.map(f, sublists)
def run(seed, checkpoint_path, samples, workers, generated_path, termination_mode, frame_take_prob=0.2, disable_adv_comm=False, t_fac=1.5):
    results = []
    with Pool(workers) as p:
       for res in p.starmap(generate, [(seed+i, checkpoint_path, int(samples/workers), termination_mode, frame_take_prob, disable_adv_comm, t_fac) for i in range(workers)]):
            results += res
    print("DONE", len(results))
    pickle.dump(results, open(generated_path, "wb"))
def extractSeparatedFiles(dataset: str,
                          raw_data_directory: str = 'D:\\Datasets\\Tsinghua'):
    '''
    Extract data from each trial (40, one for each class) and blocks (one for each trial) and save it separately,
    divided by class/trial and identified by individual and block: S{individual}_{block}.csv
    '''
    raw_data_directory = Path(raw_data_directory + '\\' + dataset)
    files = [file for file in os.listdir(raw_data_directory) if "mat" in file]
    files = segmentList(files, 4)

    for trial in range(40):
        (raw_data_directory / 'separated' / str(trial + 1)).mkdir(
            parents=True, exist_ok=True)

    if dataset == 'benchmark':
        B = 6
    else:
        B = 4

    f = partial(_extractSeparatedFiles,
                dataset=dataset,
                raw_data_directory=raw_data_directory,
                num_blocks=B)

    with Pool(processes=4) as pool:
        pool.map(f, files)
Exemplo n.º 9
0
def test_connect_to_ray(ray_start_cluster):
    def getpid(args):
        return os.getpid()

    def check_pool_size(pool, size):
        args = [tuple() for _ in range(size)]
        assert len(set(pool.map(getpid, args))) == size

    address = ray_start_cluster.address
    # Use different numbers of CPUs to distinguish between starting a local
    # ray cluster and connecting to an existing one.
    start_cpus = 1  # Set in fixture.
    init_cpus = 2

    # Check that starting a pool still starts ray if RAY_ADDRESS not set.
    pool = Pool(processes=init_cpus)
    assert ray.is_initialized()
    assert int(ray.state.cluster_resources()["CPU"]) == init_cpus
    check_pool_size(pool, init_cpus)
    ray.shutdown()

    # Check that starting a pool connects to a running ray cluster if
    # ray_address is passed in.
    pool = Pool(ray_address=address)
    assert ray.is_initialized()
    assert int(ray.state.cluster_resources()["CPU"]) == start_cpus
    check_pool_size(pool, start_cpus)
    ray.shutdown()

    # Set RAY_ADDRESS, so pools should connect to the running ray cluster.
    os.environ["RAY_ADDRESS"] = address

    # Check that starting a pool connects to a running ray cluster if
    # RAY_ADDRESS is set.
    pool = Pool()
    assert ray.is_initialized()
    assert int(ray.state.cluster_resources()["CPU"]) == start_cpus
    check_pool_size(pool, start_cpus)
    ray.shutdown()

    # Check that trying to start a pool on an existing ray cluster throws an
    # error if there aren't enough CPUs for the number of processes.
    with pytest.raises(Exception):
        Pool(processes=start_cpus + 1)
    assert int(ray.state.cluster_resources()["CPU"]) == start_cpus
    ray.shutdown()
Exemplo n.º 10
0
def test_maxtasksperchild(shutdown_only):
    def f(args):
        return os.getpid()

    pool = Pool(5, maxtasksperchild=1)
    assert len(set(pool.map(f, range(20)))) == 20
    pool.terminate()
    pool.join()
Exemplo n.º 11
0
def fitness_function_3(x, np_ext, np_int, m_l, m_i, m_ext, m_int):

    m_ext_tp = [i[0]
                for i in m_ext]  #asignar valor de coordenada de malla externa

    m_ext_a_sh = [i[1] for i in m_ext]

    p = Pool()

    args = [[i, m_int, m_i, m_ext_tp, m_ext_a_sh] for i in x]

    total = p.map(iterateArrays, args)

    p.close()

    p.join()

    return np.array(total)
Exemplo n.º 12
0
def pool_4_processes():
    pool = Pool(processes=4)
    yield pool
    pool.terminate()
    pool.join()
    ray.shutdown()
Exemplo n.º 13
0
import time

from ray.util.multiprocessing import Pool


def f(index):
    time.sleep(5)
    return index


if __name__ == "__main__":
    pool = Pool(5)
    for result in pool.map(f, range(10)):
        print(result)
Exemplo n.º 14
0
def pool():
    pool = Pool(processes=1)
    yield pool
    pool.terminate()
    ray.shutdown()
Exemplo n.º 15
0
def test_ray_init(monkeypatch, shutdown_only):
    def getpid(args):
        return os.getpid()

    def check_pool_size(pool, size):
        args = [tuple() for _ in range(size)]
        assert len(set(pool.map(getpid, args))) == size

    # Check that starting a pool starts ray if not initialized.
    pool = Pool(processes=2)
    assert ray.is_initialized()
    assert int(ray.cluster_resources()["CPU"]) == 2
    check_pool_size(pool, 2)
    pool.terminate()
    pool.join()
    ray.shutdown()

    # Set up the cluster id so that gcs is talking with a different
    # storage prefix
    monkeypatch.setenv("RAY_external_storage_namespace", "new_cluster")
    ray._raylet.Config.initialize("")

    # Check that starting a pool doesn't affect ray if there is a local
    # ray cluster running.
    ray.init(num_cpus=3)
    assert ray.is_initialized()
    pool = Pool(processes=2)
    assert int(ray.cluster_resources()["CPU"]) == 3
    check_pool_size(pool, 2)
    pool.terminate()
    pool.join()
    ray.shutdown()

    # Check that trying to start a pool on an existing ray cluster throws an
    # error if there aren't enough CPUs for the number of processes.
    ray.init(num_cpus=1)
    assert ray.is_initialized()
    with pytest.raises(ValueError):
        Pool(processes=2)
    assert int(ray.cluster_resources()["CPU"]) == 1
    ray.shutdown()
Exemplo n.º 16
0
    # Let's try multiprocess for each core
    # Since this is CPU I/O bound task, we should get better performance
    # the serial and threading
    #
    start = time.time()
    mp_pool = mp.Pool(get_cpu_count())
    with mp_pool as p:
        prime_numbers = p.map(is_prime, list(range(num)))
    end = time.time()
    mp_pool.terminate()

    print(
        f"Multi Process access: Time elapsed: {end - start:4.2f} sec to compute all primes in {num} are {sum(list(prime_numbers))}"
    )

    # Let's try that with Ray multiprocessing pool
    ray.init()
    ray_pool = Pool(get_cpu_count())
    lst = list(range(num))
    results = []
    start = time.time()
    for result in ray_pool.map(is_prime, lst):
        results.append(result)
    end = time.time()
    ray_pool.terminate()
    print(
        f"Ray Distributed Multi Process access: Time elapsed: {end - start:4.2f} sec to compute all primes in {num} are {sum(results)}"
    )
    ray.shutdown()
Exemplo n.º 17
0
 def poolit_b():
     with Pool(ray_address="auto") as pool:
         return list(pool.map(poolit_a, range(2, 4, 1)))
Exemplo n.º 18
0
 def poolit_a(_):
     with Pool(ray_address="auto") as pool:
         return list(pool.map(math.sqrt, range(0, 2, 1)))
Exemplo n.º 19
0
def _parallel_lambda_ray(function, inputs, jobs):
    from ray.util.multiprocessing import Pool
    with Pool(jobs, lambda args: pt.init(**args), pt.init_args) as pool:
        return pool.map(function, inputs)
Exemplo n.º 20
0
        page += processes
    return curr_buf


if __name__ == '__main__':
    start = time.time()
    ray.init()
    processes = os.cpu_count()
    csv_data = [['標題', '評分', '價格', '上市日期', '標籤', '評論']]
    result_ids = []
    reviews_buffer = []
    for i in range(1, processes + 1):
        result_ids.append(crawler.remote(i, processes))
    results = ray.get(result_ids)

    for res in results:
        reviews_buffer += res

    pool = Pool(processes=processes)
    data = pool.map(process_review, [[reviews_buf[1], reviews_buf[0]]
                                     for reviews_buf in reviews_buffer])
    pool.close()
    for d in data:
        csv_data += d

    with open('output.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(csv_data)

    end = time.time()
    print(f'執行時間 {end - start} 秒')
Exemplo n.º 21
0
 def _transform_ray(self, splits):
     from ray.util.multiprocessing import Pool
     with Pool(self.n_jobs, _pt_init, pt.init_args) as pool:
         results = pool.map(lambda topics: self.parent(topics), splits)
         return pd.concat(results)