Пример #1
0
def test_dataframe_set_index_sync(wait, client):
    df = dd.demo.make_timeseries('2000', '2001',
                                 {'value': float, 'name': str, 'id': int},
                                 freq='2H', partition_freq='1M', seed=1)
    df = client.persist(df)
    wait(df)

    df2 = df.set_index('name', shuffle='tasks')
    df2 = client.persist(df2)

    assert len(df2)
Пример #2
0
def test_dataframe_set_index_sync(wait, client):
    df = dd.demo.make_timeseries(
        "2000",
        "2001",
        {"value": float, "name": str, "id": int},
        freq="2H",
        partition_freq="1M",
        seed=1,
    )
    df = client.persist(df)
    wait(df)

    df2 = df.set_index("name", shuffle="tasks")
    df2 = client.persist(df2)

    assert len(df2)
Пример #3
0
def test_dataframe_set_index_sync(wait, client):
    df = dd.demo.make_timeseries('2000',
                                 '2001', {
                                     'value': float,
                                     'name': str,
                                     'id': int
                                 },
                                 freq='2H',
                                 partition_freq='1M',
                                 seed=1)
    df = client.persist(df)
    wait(df)

    df2 = df.set_index('name', shuffle='tasks')
    df2 = client.persist(df2)

    assert len(df2)
Пример #4
0
    def test_n_workers(self) -> None:
        with LocalCluster(n_workers=2) as cluster:
            with Client(cluster) as client:
                workers = list(_get_client_workers(client).keys())
                from sklearn.datasets import load_breast_cancer
                X, y = load_breast_cancer(return_X_y=True)
                dX = client.submit(da.from_array, X, workers=[workers[0]]).result()
                dy = client.submit(da.from_array, y, workers=[workers[0]]).result()
                train = xgb.dask.DaskDMatrix(client, dX, dy)

                dX = dd.from_array(X)
                dX = client.persist(dX, workers={dX: workers[1]})
                dy = dd.from_array(y)
                dy = client.persist(dy, workers={dy: workers[1]})
                valid = xgb.dask.DaskDMatrix(client, dX, dy)

                merged = xgb.dask._get_workers_from_data(train, evals=[(valid, 'Valid')])
                assert len(merged) == 2
Пример #5
0
def test_futures_in_graph(c):
    x, y = delayed(1), delayed(2)
    xx = delayed(add)(x, x)
    yy = delayed(add)(y, y)
    xxyy = delayed(add)(xx, yy)

    xxyy2 = c.persist(xxyy)
    xxyy3 = delayed(add)(xxyy2, 10)

    assert xxyy3.compute(scheduler="dask.distributed") == ((1 + 1) + (2 + 2)) + 10