示例#1
0
def test_pagerank():
    gc.collect()
    input_data_path = r"../datasets/karate.csv"
    # Networkx Call
    pd_df = pd.read_csv(input_data_path,
                        delimiter=' ',
                        names=['src', 'dst', 'value'])
    G = nx.Graph()
    for i in range(0, len(pd_df)):
        G.add_edge(pd_df['src'][i], pd_df['dst'][i])
    nx_pr = nx.pagerank(G, alpha=0.85)
    nx_pr = sorted(nx_pr.items(), key=lambda x: x[0])
    # Cugraph snmg pagerank Call
    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)
    chunksize = dcg.get_chunksize(input_data_path)
    ddf = dask_cudf.read_csv(input_data_path,
                             chunksize=chunksize,
                             delimiter=' ',
                             names=['src', 'dst', 'value'],
                             dtype=['int32', 'int32', 'float32'])

    pr = dcg.pagerank(ddf, alpha=0.85, max_iter=50)
    res_df = pr.compute()

    err = 0
    tol = 1.0e-05
    for i in range(len(res_df)):
        if (abs(res_df['pagerank'][i] - nx_pr[i][1]) > tol * 1.1):
            err = err + 1
    print("Mismatches:", err)
    assert err < (0.01 * len(res_df))

    client.close()
    cluster.close()
示例#2
0
def test_send_recv(n_trials):

    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)

    cb = CommsContext(comms_p2p=True)
    cb.init()

    cb = default_comms()

    start = time.time()
    dfs = [client.submit(func_test_send_recv,
                         cb.sessionId,
                         n_trials,
                         random.random(),
                         workers=[w])
           for wid, w in zip(range(len(cb.worker_addresses)),
                             cb.worker_addresses)]

    wait(dfs)
    print("Time: " + str(time.time() - start))

    result = list(map(lambda x: x.result(), dfs))

    print(str(result))

    assert(result)

    cb.destroy()
    client.close()
    cluster.close()
示例#3
0
    def fit(self, data, args):
        params = self.configure(data, args)
        cluster = LocalCUDACluster(
            n_workers=None if args.gpus < 0 else args.gpus,
            local_directory=args.root,
            threads_per_worker=1)
        client = Client(cluster)
        partition_size = 10000
        if isinstance(data.X_train, np.ndarray):
            X = da.from_array(data.X_train,
                              (partition_size, data.X_train.shape[1]))
            y = da.from_array(data.y_train, partition_size)
        else:

            X = dd.from_pandas(data.X_train, chunksize=partition_size)
            y = dd.from_pandas(data.y_train, chunksize=partition_size)
        dtrain = xgb.dask.DaskDMatrix(client, X, y)
        with Timer() as t:
            output = xgb.dask.train(client,
                                    params,
                                    dtrain,
                                    num_boost_round=args.ntrees)
        self.model = output['booster']
        client.close()
        cluster.close()
        return t.interval
示例#4
0
def create_cuml_distributed(X_train, y_train):
    start_time = datetime.now()
    print('init dask cluster')

    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)
    workers = client.has_what().keys()

    n_workers = len(workers)
    X_train_cudf = cudf.DataFrame.from_pandas(pd.DataFrame(X_train))
    y_train_cudf = cudf.Series(y_train)

    X_train_dask = dask_cudf.from_cudf(X_train_cudf, npartitions=n_workers)
    y_train_dask = dask_cudf.from_cudf(y_train_cudf, npartitions=n_workers)

    X_train_ddask, y_train_ddask = dask_utils.persist_across_workers(
        client, [X_train_dask, y_train_dask], workers=workers)
    print('cuml distributed initialized', datetime.now() - start_time)
    model = distributed_cuml_Rf(n_estimators=500, n_streams=64)
    model.fit(X_train, y_train)

    wait(model.rfs)
    print('cuml distributed finished', datetime.now() - start_time)
    client.close()
    cluster.close()
    return model
示例#5
0
def test_consolidation(graph_file):
    gc.collect()

    cluster = LocalCUDACluster()
    client = Client(cluster)
    chunksize = dcg.get_chunksize(graph_file)

    M = utils.read_csv_for_nx(graph_file)

    df = pd.DataFrame()
    df['source'] = pd.Series(M['0'])
    df['target'] = pd.Series(M['1'])

    ddf = dask_cudf.read_csv(graph_file,
                             chunksize=chunksize,
                             delimiter=' ',
                             names=['source', 'target', 'weight'],
                             dtype=['int32', 'int32', 'float32'],
                             header=None)

    Gnx = nx.from_pandas_edgelist(df,
                                  source='source',
                                  target='target',
                                  create_using=nx.DiGraph)
    G = cugraph.from_cudf_edgelist(ddf,
                                   source='source',
                                   destination='target',
                                   create_using=cugraph.DiGraph)

    assert compare_graphs(Gnx, G)
    Gnx.clear()
    G.clear()
    client.close()
    cluster.close()
示例#6
0
def test_pca_fit_transform_fp32(nrows, ncols, n_parts, client=None):

    owns_cluster = False
    if client is None:
        owns_cluster = True
        cluster = LocalCUDACluster(threads_per_worker=1)
        client = Client(cluster)

    from cuml.dask.decomposition import PCA as daskPCA
    from cuml.dask.datasets import make_blobs

    X_cudf, _ = make_blobs(nrows,
                           ncols,
                           1,
                           n_parts,
                           cluster_std=1.5,
                           verbose=False,
                           random_state=10,
                           dtype=np.float32)

    wait(X_cudf)

    cupca = daskPCA(n_components=20, whiten=True)
    cupca.fit_transform(X_cudf)

    if owns_cluster:
        client.close()
        cluster.close()
示例#7
0
 def fit(self, data, args):
     params = self.configure(data, args)
     n_workers = None if args.gpus < 0 else args.gpus
     cluster = LocalCUDACluster(n_workers=n_workers,
                                local_directory=args.root)
     client = Client(cluster)
     n_partitions = len(client.scheduler_info()['workers'])
     X_sliced, y_sliced = self.get_slices(n_partitions, data.X_train,
                                          data.y_train)
     X = da.concatenate(
         [da.from_array(sub_array) for sub_array in X_sliced])
     X = X.rechunk((X_sliced[0].shape[0], data.X_train.shape[1]))
     y = da.concatenate(
         [da.from_array(sub_array) for sub_array in y_sliced])
     y = y.rechunk(X.chunksize[0])
     dtrain = xgb.dask.DaskDMatrix(client, X, y)
     with Timer() as t:
         output = xgb.dask.train(client,
                                 params,
                                 dtrain,
                                 num_boost_round=args.ntrees)
     self.model = output['booster']
     client.close()
     cluster.close()
     return t.interval
示例#8
0
文件: conftest.py 项目: teju85/cuml
def cluster():

    print("Starting cluster")
    cluster = LocalCUDACluster(protocol="tcp", scheduler_port=0)
    yield cluster
    print("Closing cluster")
    cluster.close()
    print("Closed cluster")
示例#9
0
def get_cuda_cluster():
    from dask_cuda import LocalCUDACluster

    CUDA_VISIBLE_DEVICES = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
    n_workers = min(2, len(CUDA_VISIBLE_DEVICES.split(",")))
    cluster = LocalCUDACluster(n_workers=n_workers)
    yield cluster
    cluster.close()
示例#10
0
class MGContext:
    """Utility Context Manager to start a multi GPU context using dask_cuda
    Parameters:
    -----------
    number_of_devices : int
        Number of devices to use, verification must be done prior to call to
        ensure that there are enough devices available. If not specified, the
        cluster will be initialized to use all visible devices.
    rmm_managed_memory : bool
        True to enable managed memory (UVM) in RMM as part of the
        cluster. Default is False.
    p2p : bool
        Initialize UCX endpoints if True. Default is False.
    """
    def __init__(self,
                 number_of_devices=None,
                 rmm_managed_memory=False,
                 p2p=False):
        self._number_of_devices = number_of_devices
        self._rmm_managed_memory = rmm_managed_memory
        self._client = None
        self._p2p = p2p
        self._cluster = CUDACluster(
            n_workers=self._number_of_devices,
            rmm_managed_memory=self._rmm_managed_memory)

    @property
    def client(self):
        return self._client

    @property
    def cluster(self):
        return self._cluster

    def __enter__(self):
        self._prepare_mg()
        return self

    def _prepare_mg(self):
        self._prepare_client()
        self._prepare_comms()

    def _prepare_client(self):
        self._client = Client(self._cluster)
        self._client.wait_for_workers(self._number_of_devices)

    def _prepare_comms(self):
        Comms.initialize(p2p=self._p2p)

    def _close(self):
        Comms.destroy()
        if self._client is not None:
            self._client.close()
        if self._cluster is not None:
            self._cluster.close()

    def __exit__(self, type, value, traceback):
        self._close()
示例#11
0
def predict_xgboost_gpu(xgb_model,
                        X,
                        data_chunksize=None,
                        n_gpus=None,
                        n_threads_per_gpu=1,
                        gpu_cluster=None,
                        client=None):
    '''
	Predicts the output for the input features X using the 'xgb_model' running on the GPU.

	:param xgb_model: a dask XGBoost model to use for predictions
	:param X: the input features to use for predictions, must be either a numpy ndarray or a pandas DataFrame
	:param data_chunksize: chunk sizes to be used on a dask dataframe, leave the default value None for auto decision
	:param n_gpus: number of GPUs to be used. Default value None selects all available devices;
	:param n_threads_per_gpu: number of threads per GPU;
	:param gpu_cluster: an existing dask cluster object to use. This param should be used if you call this method
		too many times in quick successions. Note that this function doesn't close an externally created cluster.
	:param client: an existing dask cluster object to use. This param should be used if you call this method
		too many times in quick successions. Note that this function doesn't close an externally created client.
	:return:
		If the input features X is a pandas DataFrame, returns a array-like DataFrame of single column containing
		the predictions;

		Otherwise, if the input features X is a numpy ndarray, returns a 1D ndarray containing the predictions .
	'''
    if gpu_cluster is None:
        local_gpus = LocalCUDACluster(n_workers=n_gpus,
                                      threads_per_worker=n_threads_per_gpu)
    else:
        local_gpus = gpu_cluster
    if client is None:
        local_dask_client = Client(local_gpus)
    else:
        local_dask_client = client

    if data_chunksize is None:
        data_chunksize = X.shape[0] // len(local_gpus.cuda_visible_devices)

    if isinstance(X, pd.DataFrame):
        ndarray = False
        X = from_pandas(X, chunksize=data_chunksize)
    else:
        ndarray = True
        X = from_array(X, chunksize=data_chunksize)

    y_predicted = dask_xgboost_predict(local_dask_client, xgb_model, X)
    y_predicted = pd.DataFrame(y_predicted)

    if client is None:
        local_dask_client.close()
    if gpu_cluster is None:
        local_gpus.close()

    if ndarray:
        return y_predicted.to_numpy()
    return y_predicted
示例#12
0
def test_pagerank():
    gc.collect()
    input_data_path = r"../datasets/hibench_small/1/part-00000.csv"

    # Networkx Call
    pd_df = pd.read_csv(input_data_path, delimiter='\t', names=['src', 'dst'])
    G = nx.DiGraph()
    for i in range(0, len(pd_df)):
        G.add_edge(pd_df['src'][i], pd_df['dst'][i])
    nx_pr = nx.pagerank(G, alpha=0.85)
    nx_pr = sorted(nx_pr.items(), key=lambda x: x[0])

    # Cugraph snmg pagerank Call
    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)

    t0 = time.time()
    chunksize = dcg.get_chunksize(input_data_path)
    ddf = dask_cudf.read_csv(input_data_path,
                             chunksize=chunksize,
                             delimiter='\t',
                             names=['src', 'dst'],
                             dtype=['int32', 'int32'])
    y = ddf.to_delayed()
    x = client.compute(y)
    wait(x)
    t1 = time.time()
    print("Reading Csv time: ", t1 - t0)
    new_ddf = dcg.drop_duplicates(x)
    t2 = time.time()
    pr = dcg.pagerank(new_ddf, alpha=0.85, max_iter=50)
    wait(pr)
    t3 = time.time()
    print("Running PR algo time: ", t3 - t2)
    t4 = time.time()
    res_df = pr.compute()
    t5 = time.time()
    print("Compute time: ", t5 - t4)
    print(res_df)
    t6 = time.time()
    # For bigdatax4, chunksize=100000000 to avoid oom on write csv
    res_df.to_csv('~/pagerank.csv', header=False, index=False)
    t7 = time.time()
    print("Write csv time: ", t7 - t6)

    # Comparison
    err = 0
    tol = 1.0e-05
    for i in range(len(res_df)):
        if (abs(res_df['pagerank'][i] - nx_pr[i][1]) > tol * 1.1):
            err = err + 1
    print("Mismatches:", err)
    assert err < (0.02 * len(res_df))

    client.close()
    cluster.close()
示例#13
0
def test_default_comms_no_exist():
    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)
    cb = default_comms()
    assert cb is not None

    cb2 = default_comms()
    assert cb.sessionId == cb2.sessionId
    client.close()
    cluster.close()
示例#14
0
def client_connection():
    cluster = LocalCUDACluster()
    client = Client(cluster)
    Comms.initialize(p2p=True)

    yield client

    Comms.destroy()
    client.close()
    cluster.close()
示例#15
0
文件: conftest.py 项目: rapidsai/cuml
def ucx_cluster():
    initialize.initialize(create_cuda_context=True,
                          enable_tcp_over_ucx=enable_tcp_over_ucx,
                          enable_nvlink=enable_nvlink,
                          enable_infiniband=enable_infiniband)
    cluster = LocalCUDACluster(protocol="ucx",
                               enable_tcp_over_ucx=enable_tcp_over_ucx,
                               enable_nvlink=enable_nvlink,
                               enable_infiniband=enable_infiniband)
    yield cluster
    cluster.close()
示例#16
0
class MGContext:
    """Utility Context Manager to start a multi GPU context using dask_cuda

    Parameters:
    -----------

    number_of_devices : int
        Number of devices to use, verification must be done prior to call
        to ensure that there are enough devices available.
    """
    def __init__(self, number_of_devices=None, rmm_managed_memory=False):
        self._number_of_devices = number_of_devices
        self._rmm_managed_memory = rmm_managed_memory
        self._cluster = None
        self._client = None

    @property
    def client(self):
        return self._client

    @property
    def cluster(self):
        return self._cluster

    def __enter__(self):
        self._prepare_mg()
        return self

    def _prepare_mg(self):
        self._prepare_cluster()
        self._prepare_client()
        self._prepare_comms()

    def _prepare_cluster(self):
        self._cluster = CUDACluster(
            n_workers=self._number_of_devices,
            rmm_managed_memory=self._rmm_managed_memory)

    def _prepare_client(self):
        self._client = Client(self._cluster)
        self._client.wait_for_workers(self._number_of_devices)

    def _prepare_comms(self):
        Comms.initialize()

    def _close(self):
        Comms.destroy()
        if self._client is not None:
            self._client.close()
        if self._cluster is not None:
            self._cluster.close()

    def __exit__(self, type, value, traceback):
        self._close()
示例#17
0
def client_connection():
    # setup
    cluster = LocalCUDACluster()
    client = Client(cluster)
    Comms.initialize()

    yield client

    # teardown
    Comms.destroy()
    client.close()
    cluster.close()
示例#18
0
def test_pca_fit(nrows, ncols, n_parts, client=None):

    owns_cluster = False
    if client is None:
        owns_cluster = True
        cluster = LocalCUDACluster(threads_per_worker=1)
        client = Client(cluster)

    from cuml.dask.decomposition import TruncatedSVD as daskTPCA
    from sklearn.decomposition import TruncatedSVD

    from cuml.dask.datasets import make_blobs

    X_cudf, _ = make_blobs(nrows,
                           ncols,
                           1,
                           n_parts,
                           cluster_std=0.5,
                           verbose=False,
                           random_state=10,
                           dtype=np.float32)

    wait(X_cudf)

    X = X_cudf.compute().to_pandas().values

    cutsvd = daskTPCA(n_components=5)
    cutsvd.fit(X_cudf)

    sktsvd = TruncatedSVD(n_components=5, algorithm="arpack")
    sktsvd.fit(X)

    all_attr = [
        'singular_values_', 'components_', 'explained_variance_',
        'explained_variance_ratio_'
    ]

    if owns_cluster:
        client.close()
        cluster.close()

    for attr in all_attr:
        with_sign = False if attr in ['components_'] else True
        cuml_res = (getattr(cutsvd, attr))
        if type(cuml_res) == np.ndarray:
            cuml_res = cuml_res.as_matrix()
        skl_res = getattr(sktsvd, attr)
        if attr == 'singular_values_':
            assert array_equal(cuml_res, skl_res, 1, with_sign=with_sign)
        else:
            assert array_equal(cuml_res, skl_res, 1e-1, with_sign=with_sign)
示例#19
0
def test_default_comms():

    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)

    cb = CommsContext(comms_p2p=True, client=client)
    cb.init()

    comms = default_comms()
    assert(cb.sessionId == comms.sessionId)

    comms.destroy()
    client.close()
    cluster.close()
示例#20
0
def test_pca_fit(nrows, ncols, n_parts, client=None):

    owns_cluster = False
    if client is None:
        owns_cluster = True
        cluster = LocalCUDACluster(threads_per_worker=1)
        client = Client(cluster)

    from cuml.dask.decomposition import PCA as daskPCA
    from sklearn.decomposition import PCA

    from cuml.dask.datasets import make_blobs

    X_cudf, _ = make_blobs(nrows,
                           ncols,
                           1,
                           n_parts,
                           cluster_std=0.5,
                           verbose=False,
                           random_state=10,
                           dtype=np.float32)

    wait(X_cudf)

    X = X_cudf.compute().to_pandas().values

    cupca = daskPCA(n_components=5, whiten=True)
    cupca.fit(X_cudf)

    skpca = PCA(n_components=5, whiten=True, svd_solver="full")
    skpca.fit(X)

    from cuml.test.utils import array_equal

    all_attr = [
        'singular_values_', 'components_', 'explained_variance_',
        'explained_variance_ratio_'
    ]

    if owns_cluster:
        client.close()
        cluster.close()

    for attr in all_attr:
        with_sign = False if attr in ['components_'] else True
        cuml_res = (getattr(cupca, attr))
        if type(cuml_res) == np.ndarray:
            cuml_res = cuml_res.as_matrix()
        skl_res = getattr(skpca, attr)
        assert array_equal(cuml_res, skl_res, 1e-3, with_sign=with_sign)
示例#21
0
def test_splitting():
    gc.collect()

    # This is an experimental setup for 300GB bigdatax8 dataset.
    # This test can be run on 16 32GB gpus. The dataset is split into 32 files.
    input_data_path = r"/datasets/pagerank_demo/1/Input-bigdatax8/edges/"
    input_files = [
        'file-00000.csv', 'file-00001.csv', 'file-00002.csv', 'file-00003.csv',
        'file-00004.csv', 'file-00005.csv', 'file-00006.csv', 'file-00007.csv',
        'file-00008.csv', 'file-00009.csv', 'file-00010.csv', 'file-00011.csv',
        'file-00012.csv', 'file-00013.csv', 'file-00014.csv', 'file-00015.csv',
        'file-00016.csv', 'file-00017.csv', 'file-00018.csv', 'file-00019.csv',
        'file-00020.csv', 'file-00021.csv', 'file-00022.csv', 'file-00023.csv',
        'file-00024.csv', 'file-00025.csv', 'file-00026.csv', 'file-00027.csv',
        'file-00028.csv', 'file-00029.csv', 'file-00030.csv', 'file-00031.csv'
    ]

    # Cugraph snmg pagerank Call
    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)

    files = [input_data_path + f for f in input_files]

    # Read 2 files per gpu/worker and concatenate the dataframe
    # This is a work around for large files to fit memory requirements
    # of cudf.read_csv
    t0 = time.time()
    new_ddf = dcg.read_split_csv(files)
    t1 = time.time()
    print("Reading Csv time: ", t1 - t0)
    t2 = time.time()
    pr = dcg.pagerank(new_ddf, alpha=0.85, max_iter=3)
    wait(pr)
    t3 = time.time()
    print("Pagerank (Dask) time: ", t3 - t2)
    t4 = time.time()
    res_df = pr.compute()
    t5 = time.time()
    print("Compute time: ", t5 - t4)
    print(res_df)
    t6 = time.time()
    res_df.to_csv('~/pagerank.csv',
                  chunksize=40000000,
                  header=False,
                  index=False)
    t7 = time.time()
    print("Write csv time: ", t7 - t6)

    client.close()
    cluster.close()
示例#22
0
def test_end_to_end(nrows, ncols, nclusters, n_parts, client=None):

    owns_cluster = False
    if client is None:
        owns_cluster = True
        cluster = LocalCUDACluster(threads_per_worker=1)
        client = Client(cluster)

    from cuml.dask.cluster import KMeans as cumlKMeans
    from dask_ml.cluster import KMeans as dmlKMeans

    from cuml.test.dask.utils import dask_make_blobs

    X_df, X_cudf = dask_make_blobs(nrows,
                                   ncols,
                                   nclusters,
                                   n_parts,
                                   cluster_std=0.1,
                                   verbose=True,
                                   random_state=10)

    wait(X_cudf)

    cumlModel = cumlKMeans(verbose=0,
                           init="k-means||",
                           n_clusters=nclusters,
                           random_state=10)
    daskmlModel1 = dmlKMeans(init="k-means||",
                             n_clusters=nclusters,
                             random_state=10)

    cumlModel.fit(X_cudf)
    daskmlModel1.fit(X_df)

    cumlLabels = cumlModel.predict(X_cudf)
    daskmlLabels1 = daskmlModel1.predict(X_df)

    from sklearn.metrics import adjusted_rand_score

    cumlPred = cumlLabels.compute().to_pandas().values
    daskmlPred1 = daskmlLabels1.compute()

    score = adjusted_rand_score(cumlPred, daskmlPred1)

    if owns_cluster:
        client.close()
        cluster.close()

    assert 1.0 == score
示例#23
0
def test_rf_regression(n_workers, partitions_per_worker):
    if dask_cuda.utils.get_n_gpus() < n_workers:
        pytest.skip("too few GPUs")

    cluster = LocalCUDACluster(threads_per_worker=1, n_workers=n_workers)
    c = Client(cluster)

    X, y = make_regression(n_samples=40000,
                           n_features=20,
                           n_informative=10,
                           random_state=123)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1000)

    cu_rf_params = {
        'n_estimators': 25,
        'max_depth': 13,
    }

    workers = c.has_what().keys()
    n_partitions = partitions_per_worker * len(workers)

    X_cudf = cudf.DataFrame.from_pandas(pd.DataFrame(X_train))
    X_train_df = \
        dask_cudf.from_cudf(X_cudf, npartitions=n_partitions)

    y_cudf = np.array(pd.DataFrame(y_train).values)
    y_cudf = y_cudf[:, 0]
    y_cudf = cudf.Series(y_cudf)
    y_train_df = \
        dask_cudf.from_cudf(y_cudf, npartitions=n_partitions)

    X_train_df, y_train_df = dask_utils.persist_across_workers(
        c, [X_train_df, y_train_df], workers=workers)

    cu_rf_mg = cuRFR_mg(**cu_rf_params)
    cu_rf_mg.fit(X_train_df, y_train_df)
    cu_rf_mg_predict = cu_rf_mg.predict(X_test)

    acc_score = r2_score(cu_rf_mg_predict, y_test)

    print(str(acc_score))

    assert acc_score >= 0.70

    c.close()
    cluster.close()
示例#24
0
def test_dask_bfs():
    gc.collect()
    cluster = LocalCUDACluster()
    client = Client(cluster)
    Comms.initialize()

    input_data_path = r"../datasets/netscience.csv"
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(input_data_path,
                             chunksize=chunksize,
                             delimiter=' ',
                             names=['src', 'dst', 'value'],
                             dtype=['int32', 'int32', 'float32'])

    df = cudf.read_csv(input_data_path,
                       delimiter=' ',
                       names=['src', 'dst', 'value'],
                       dtype=['int32', 'int32', 'float32'])

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, 'src', 'dst', renumber=True)

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, renumber=True)

    expected_dist = cugraph.bfs(g, 0)
    result_dist = dcg.bfs(dg, 0, True)

    compare_dist = expected_dist.merge(result_dist,
                                       on="vertex",
                                       suffixes=['_local', '_dask'])

    err = 0

    for i in range(len(compare_dist)):
        if (compare_dist['distance_local'].iloc[i] !=
                compare_dist['distance_dask'].iloc[i]):
            err = err + 1
    assert err == 0

    Comms.destroy()
    client.close()
    cluster.close()
示例#25
0
def local_cuda_cluster(request, pytestconfig):
    kwargs = {}
    if hasattr(request, 'param'):
        kwargs.update(request.param)
    if pytestconfig.getoption('--use-rmm-pool'):
        if not has_rmm():
            raise ImportError(
                'The --use-rmm-pool option requires the RMM package')
        import rmm
        from dask_cuda.utils import get_n_gpus
        rmm.reinitialize()
        kwargs['rmm_pool_size'] = '2GB'
    if tm.no_dask_cuda()['condition']:
        raise ImportError(
            'The local_cuda_cluster fixture requires dask_cuda package')
    from dask_cuda import LocalCUDACluster
    cluster = LocalCUDACluster(**kwargs)
    yield cluster
    cluster.close()
示例#26
0
def dask_client():
    dask_scheduler_file = os.environ.get("SCHEDULER_FILE")
    cluster = None
    client = None
    tempdir_object = None

    if dask_scheduler_file:
        # Env var UCX_MAX_RNDV_RAILS=1 must be set too.
        initialize(
            enable_tcp_over_ucx=True,
            enable_nvlink=True,
            enable_infiniband=True,
            enable_rdmacm=True,
            # net_devices="mlx5_0:1",
        )
        client = Client(scheduler_file=dask_scheduler_file)
        print("\ndask_client fixture: client created using "
              f"{dask_scheduler_file}")
    else:
        # The tempdir created by tempdir_object should be cleaned up once
        # tempdir_object goes out-of-scope and is deleted.
        tempdir_object = tempfile.TemporaryDirectory()
        cluster = LocalCUDACluster(local_directory=tempdir_object.name)
        client = Client(cluster)
        client.wait_for_workers(len(get_visible_devices()))
        print("\ndask_client fixture: client created using LocalCUDACluster")

    Comms.initialize(p2p=True)

    yield client

    Comms.destroy()
    # Shut down the connected scheduler and workers
    # therefore we will no longer rely on killing the dask cluster ID
    # for MNMG runs
    client.shutdown()
    if cluster:
        cluster.close()
    print("\ndask_client fixture: client.close() called")
示例#27
0
def test_ols(nrows,
             ncols,
             n_parts,
             fit_intercept,
             normalize,
             datatype,
             client=None):

    if client is None:
        cluster = LocalCUDACluster()
        client = Client(cluster)

    try:
        from cuml.dask.linear_model import LinearRegression as cumlOLS_dask

        n_info = 5
        nrows = np.int(nrows)
        ncols = np.int(ncols)
        X, y = make_regression_dataset(datatype, nrows, ncols, n_info)

        X_df, y_df = _prep_training_data(client, X, y, n_parts)

        lr = cumlOLS_dask(fit_intercept=fit_intercept, normalize=normalize)

        if n_parts > 2:
            lr.fit(X_df, y_df, force_colocality=True)
        else:
            lr.fit(X_df, y_df)

        ret = lr.predict(X_df)

        error_cuml = mean_squared_error(y, ret.compute().to_pandas().values)

        assert (error_cuml < 1e-6)

    finally:
        client.close()
        cluster.close()
示例#28
0
def test_allreduce():

    cluster = LocalCUDACluster(threads_per_worker=1)
    client = Client(cluster)

    cb = CommsContext()
    cb.init()

    start = time.time()
    dfs = [client.submit(func_test_allreduce, cb.sessionId,
                         random.random(), workers=[w])
           for wid, w in zip(range(len(cb.worker_addresses)),
                             cb.worker_addresses)]
    wait(dfs)

    print("Time: " + str(time.time() - start))

    print(str(list(map(lambda x: x.result(), dfs))))

    assert all(list(map(lambda x: x.result(), dfs)))

    cb.destroy()
    client.close()
    cluster.close()
示例#29
0
文件: conftest.py 项目: trxcllnt/cuml
def cluster():
    cluster = LocalCUDACluster()
    yield cluster
    cluster.close()
示例#30
0
文件: conftest.py 项目: rapidsai/cuml
def cluster():

    cluster = LocalCUDACluster(protocol="tcp", scheduler_port=0)
    yield cluster
    cluster.close()