Exemplo n.º 1
0
    def test_data_initialization(self, local_cuda_cluster: LocalCUDACluster) -> None:
        with Client(local_cuda_cluster) as client:
            X, y, _ = generate_array()
            fw = da.random.random((random_cols, ))
            fw = fw - fw.min()
            m = dxgb.DaskDMatrix(client, X, y, feature_weights=fw)

            workers = _get_client_workers(client)
            rabit_args = client.sync(dxgb._get_rabit_args, len(workers), client)

            def worker_fn(worker_addr: str, data_ref: Dict) -> None:
                with dxgb.RabitContext(rabit_args):
                    local_dtrain = dxgb._dmatrix_from_list_of_parts(**data_ref)
                    fw_rows = local_dtrain.get_float_info("feature_weights").shape[0]
                    assert fw_rows == local_dtrain.num_col()

            futures = []
            for i in range(len(workers)):
                futures.append(
                    client.submit(
                        worker_fn,
                        workers[i],
                        m._create_fn_args(workers[i]),
                        pure=False,
                        workers=[workers[i]]
                    )
                )
            client.gather(futures)
Exemplo n.º 2
0
def run_rabit_ops(client, n_workers):
    from test_with_dask import _get_client_workers
    from xgboost.dask import RabitContext, _get_rabit_args
    from xgboost import rabit

    workers = _get_client_workers(client)
    rabit_args = client.sync(_get_rabit_args, len(workers), None, client)
    assert not rabit.is_distributed()
    n_workers_from_dask = len(workers)
    assert n_workers == n_workers_from_dask

    def local_test(worker_id):
        with RabitContext(rabit_args):
            a = 1
            assert rabit.is_distributed()
            a = np.array([a])
            reduced = rabit.allreduce(a, rabit.Op.SUM)
            assert reduced[0] == n_workers

            worker_id = np.array([worker_id])
            reduced = rabit.allreduce(worker_id, rabit.Op.MAX)
            assert reduced == n_workers - 1

            return 1

    futures = client.map(local_test, range(len(workers)), workers=workers)
    results = client.gather(futures)
    assert sum(results) == n_workers
Exemplo n.º 3
0
    def run_quantile(self, name: str,
                     local_cuda_cluster: LocalCUDACluster) -> None:
        if sys.platform.startswith("win"):
            pytest.skip("Skipping dask tests on Windows")

        exe = None
        for possible_path in {
                './testxgboost', './build/testxgboost', '../build/testxgboost',
                '../gpu-build/testxgboost'
        }:
            if os.path.exists(possible_path):
                exe = possible_path
        assert exe, 'No testxgboost executable found.'
        test = "--gtest_filter=GPUQuantile." + name

        def runit(worker_addr: str,
                  rabit_args: List[bytes]) -> subprocess.CompletedProcess:
            port_env = ''
            # setup environment for running the c++ part.
            for arg in rabit_args:
                if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'):
                    port_env = arg.decode('utf-8')
                    port_env = arg.decode('utf-8')
                if arg.decode("utf-8").startswith("DMLC_TRACKER_URI"):
                    uri_env = arg.decode("utf-8")
            port = port_env.split('=')
            env = os.environ.copy()
            env[port[0]] = port[1]
            uri = uri_env.split("=")
            env[uri[0]] = uri[1]
            return subprocess.run([str(exe), test],
                                  env=env,
                                  stdout=subprocess.PIPE)

        with Client(local_cuda_cluster) as client:
            workers = _get_client_workers(client)
            rabit_args = client.sync(dxgb._get_rabit_args, len(workers), None,
                                     client)
            futures = client.map(runit,
                                 workers,
                                 pure=False,
                                 workers=workers,
                                 rabit_args=rabit_args)
            results = client.gather(futures)
            for ret in results:
                msg = ret.stdout.decode('utf-8')
                assert msg.find('1 test from GPUQuantile') != -1, msg
                assert ret.returncode == 0, msg
Exemplo n.º 4
0
def make_categorical(
    client: Client,
    n_samples: int,
    n_features: int,
    n_categories: int,
    onehot: bool = False,
) -> Tuple[dd.DataFrame, dd.Series]:
    workers = _get_client_workers(client)
    n_workers = len(workers)
    dfs = []

    def pack(**kwargs: Any) -> dd.DataFrame:
        X, y = tm.make_categorical(**kwargs)
        X["label"] = y
        return X

    meta = pack(n_samples=1,
                n_features=n_features,
                n_categories=n_categories,
                onehot=False)

    for i, worker in enumerate(workers):
        l_n_samples = min(n_samples // n_workers,
                          n_samples - i * (n_samples // n_workers))
        future = client.submit(
            pack,
            n_samples=l_n_samples,
            n_features=n_features,
            n_categories=n_categories,
            onehot=False,
            workers=[worker],
        )
        dfs.append(future)

    df = dd.from_delayed(dfs, meta=meta)
    y = df["label"]
    X = df[df.columns.difference(["label"])]

    if onehot:
        return dd.get_dummies(X), y
    return X, y
Exemplo n.º 5
0
    def run_quantile(self, name, local_cuda_cluster):
        if sys.platform.startswith("win"):
            pytest.skip("Skipping dask tests on Windows")

        exe = None
        for possible_path in {
                './testxgboost', './build/testxgboost', '../build/testxgboost',
                '../gpu-build/testxgboost'
        }:
            if os.path.exists(possible_path):
                exe = possible_path
        assert exe, 'No testxgboost executable found.'
        test = "--gtest_filter=GPUQuantile." + name

        def runit(worker_addr, rabit_args):
            port = None
            # setup environment for running the c++ part.
            for arg in rabit_args:
                if arg.decode('utf-8').startswith('DMLC_TRACKER_PORT'):
                    port = arg.decode('utf-8')
            port = port.split('=')
            env = os.environ.copy()
            env[port[0]] = port[1]
            return subprocess.run([exe, test], env=env, stdout=subprocess.PIPE)

        with Client(local_cuda_cluster) as client:
            workers = list(_get_client_workers(client).keys())
            rabit_args = client.sync(dxgb._get_rabit_args, workers, client)
            futures = client.map(runit,
                                 workers,
                                 pure=False,
                                 workers=workers,
                                 rabit_args=rabit_args)
            results = client.gather(futures)
            for ret in results:
                msg = ret.stdout.decode('utf-8')
                assert msg.find('1 test from GPUQuantile') != -1, msg
                assert ret.returncode == 0, msg
Exemplo n.º 6
0
 def test_empty_dmatrix_auc(self, local_cuda_cluster: LocalCUDACluster) -> None:
     with Client(local_cuda_cluster) as client:
         n_workers = len(_get_client_workers(client))
         run_empty_dmatrix_auc(client, "gpu_hist", n_workers)