Exemplo n.º 1
0
def test_eventstream(c, s, *workers):
    pytest.importorskip('bokeh')

    es = EventStream()
    s.add_plugin(es)
    assert es.buffer == []

    futures = c.map(div, [1] * 10, range(10))
    total = c.submit(sum, futures[1:])
    yield wait(total)
    yield wait(futures)

    assert len(es.buffer) == 11

    from distributed.bokeh import messages
    from distributed.diagnostics.progress_stream import task_stream_append
    lists = deepcopy(messages['task-events']['rectangles'])
    workers = dict()
    for msg in es.buffer:
        task_stream_append(lists, msg, workers)

    assert len([n for n in lists['name'] if n.startswith('transfer')]) == 2
    for name, color in zip(lists['name'], lists['color']):
        if name == 'transfer':
            assert color == 'red'

    assert any(c == 'black' for c in lists['color'])
Exemplo n.º 2
0
def test_client_sync(client):
    with get_task_stream(client=client) as ts:
        sleep(0.1)  # to smooth over time differences on the scheduler
        # to smooth over time differences on the scheduler
        futures = client.map(inc, range(10))
        wait(futures)

    assert len(ts.data) == 10
Exemplo n.º 3
0
def test_progressbar_cancel(client):
    import time
    L = [client.submit(lambda: time.sleep(0.3), i) for i in range(5)]
    p = ProgressWidget(L)
    client.sync(p.listen)
    L[-1].cancel()
    wait(L[:-1])
    assert p.status == 'error'
    assert p.bar.value == 0  # no tasks finish before cancel is called
Exemplo n.º 4
0
def test_dataframe_set_index_sync(wait, client):
    df = dd.demo.make_timeseries('2000', '2001',
                                 {'value': float, 'name': str, 'id': int},
                                 freq='2H', partition_freq='1M', seed=1)
    df = client.persist(df)
    wait(df)

    df2 = df.set_index('name', shuffle='tasks')
    df2 = client.persist(df2)

    assert len(df2)
Exemplo n.º 5
0
def test_progressbar_cancel(loop):
    with cluster() as (s, [a, b]):
        with Client(('127.0.0.1', s['port']), loop=loop) as c:
            import time
            L = [c.submit(lambda: time.sleep(0.3), i) for i in range(5)]
            p = ProgressWidget(L)
            sync(loop, p.listen)
            L[-1].cancel()
            wait(L[:-1])
            assert p.status == 'error'
            assert p.bar.value == 0  # no tasks finish before cancel is called
Exemplo n.º 6
0
def test_persist(c, s, a, b):
    x = delayed(inc)(1)
    x2, = persist(x)

    yield wait(x2)
    assert x2.key in a.data or x2.key in b.data

    y = delayed(inc)(10)
    y2, one = persist(y, 1)

    yield wait(y2)
    assert y2.key in a.data or y2.key in b.data
Exemplo n.º 7
0
def test_get_task_stream_save(client, tmpdir):
    bokeh = pytest.importorskip('bokeh')
    tmpdir = str(tmpdir)
    fn = os.path.join(tmpdir, 'foo.html')

    with get_task_stream(plot='save', filename=fn) as ts:
        wait(client.map(inc, range(10)))
    with open(fn) as f:
        data = f.read()
    assert 'inc' in data
    assert 'bokeh' in data

    assert isinstance(ts.figure, bokeh.plotting.Figure)
Exemplo n.º 8
0
def test_compute(c, s, a, b):
    x = delayed(inc)(1)
    y = delayed(inc)(x)

    yy = c.compute(y, resources={x: {'A': 1}, y: {'B': 1}})
    yield wait(yy)

    assert b.data

    xs = [delayed(inc)(i) for i in range(10, 20)]
    xxs = c.compute(xs, resources={'B': 1})
    yield wait(xxs)

    assert len(b.data) > 10
Exemplo n.º 9
0
def test_dataframe_set_index_sync(loop, wait):
    with cluster() as (c, [a, b]):
        with Client(("127.0.0.1", c["port"]), loop=loop) as c:
            with dask.set_options(get=c.get):
                df = dd.demo.make_timeseries(
                    "2000", "2001", {"value": float, "name": str, "id": int}, freq="2H", partition_freq="1M", seed=1
                )
                df = c.persist(df)
                wait(df)

                df2 = df.set_index("name", shuffle="tasks")
                df2 = c.persist(df2)

                assert len(df2)
Exemplo n.º 10
0
def test_values(client):
    L = [client.submit(inc, i) for i in range(5)]
    wait(L)
    p = MultiProgressWidget(L)
    client.sync(p.listen)
    assert set(p.bars) == {'inc'}
    assert p.status == 'finished'
    assert p.comm.closed()
    assert '5 / 5' in p.bar_texts['inc'].value
    assert p.bars['inc'].value == 1.0

    x = client.submit(throws, 1)
    p = MultiProgressWidget([x])
    client.sync(p.listen)
    assert p.status == 'error'
Exemplo n.º 11
0
def test_move(c, s, a, b):
    [x] = yield c._scatter([1], workers=b.address)

    future = c.submit(inc, x, resources={'A': 1})

    yield wait(future)
    assert a.data[future.key] == 2
Exemplo n.º 12
0
def test_balance_resources(c, s, a, b):
    futures = c.map(slowinc, range(100), delay=0.1, workers=a.address)
    constrained = c.map(inc, range(2), resources={'A': 1})

    yield wait(constrained)
    assert any(f.key in a.data for f in constrained)  # share
    assert any(f.key in b.data for f in constrained)
Exemplo n.º 13
0
def test_work_stealing(c, s, a, b):
    [x] = yield c._scatter([1], workers=a.address)
    futures = c.map(slowadd, range(50), [x] * 50, delay=0.1)
    yield gen.sleep(0.1)
    yield wait(futures)
    assert len(a.data) > 10
    assert len(b.data) > 10
Exemplo n.º 14
0
def test_TaskStreamPlugin(c, s, *workers):
    es = TaskStreamPlugin(s)
    assert not es.buffer

    futures = c.map(div, [1] * 10, range(10))
    total = c.submit(sum, futures[1:])
    yield wait(total)

    assert len(es.buffer) == 11

    workers = dict()

    rects = es.rectangles(0, 10, workers)
    assert workers
    assert all(n == 'div' for n in rects['name'])
    assert all(d > 0 for d in rects['duration'])
    counts = frequencies(rects['color'])
    assert counts['black'] == 1
    assert set(counts.values()) == {9, 1}
    assert len(set(rects['y'])) == 3

    rects = es.rectangles(2, 5, workers)
    assert all(len(L) == 3 for L in rects.values())

    starts = sorted(rects['start'])
    rects = es.rectangles(2, 5, workers=workers,
                          start_boundary=(starts[0] + starts[1]) / 2000)
    assert set(rects['start']).issubset(set(starts[1:]))
Exemplo n.º 15
0
def test_collect(c, s, a, b):
    tasks = TaskStreamPlugin(s)
    start = time()
    futures = c.map(slowinc, range(10), delay=0.1)
    yield wait(futures)

    L = tasks.collect()
    assert len(L) == len(futures)
    L = tasks.collect(start=start)
    assert len(L) == len(futures)

    L = tasks.collect(start=start + 0.2)
    assert 4 <= len(L) <= len(futures)

    L = tasks.collect(start='20 s')
    assert len(L) == len(futures)

    L = tasks.collect(start='500ms')
    assert 0 < len(L) <= len(futures)

    L = tasks.collect(count=3)
    assert len(L) == 3
    assert L == list(tasks.buffer)[-3:]

    assert tasks.collect(stop=start + 100, count=3) == tasks.collect(count=3)
    assert tasks.collect(start=start, count=3) == list(tasks.buffer)[:3]
Exemplo n.º 16
0
def test_values(loop):
    with cluster() as (s, [a, b]):
        with Client(('127.0.0.1', s['port']), loop=loop) as c:
            L = [c.submit(inc, i) for i in range(5)]
            wait(L)
            p = MultiProgressWidget(L)
            sync(loop, p.listen)
            assert set(p.bars) == {'inc'}
            assert p.status == 'finished'
            assert p.stream.closed()
            assert '5 / 5' in p.bar_texts['inc'].value
            assert p.bars['inc'].value == 1.0

            x = c.submit(throws, 1)
            p = MultiProgressWidget([x])
            sync(loop, p.listen)
            assert p.status == 'error'
Exemplo n.º 17
0
def test_dont_work_steal(c, s, a, b):
    [x] = yield c._scatter([1], workers=a.address)

    futures = [c.submit(slowadd, x, i, resources={'A': 1}, delay=0.05)
               for i in range(10)]

    yield wait(futures)
    assert all(f.key in a.data for f in futures)
Exemplo n.º 18
0
def test_minimum_resource(c, s, a):
    futures = c.map(slowinc, range(30), resources={'A': 1, 'B': 1}, delay=0.02)

    while len(a.data) < 30:
        yield gen.sleep(0.01)
        assert len(a.executing) <= 1

    yield wait(futures)
    assert a.total_resources == a.available_resources
Exemplo n.º 19
0
def test_get_task_stream_plot(c, s, a, b):
    bokeh = pytest.importorskip('bokeh')
    yield c.get_task_stream()

    futures = c.map(slowinc, range(10), delay=0.1)
    yield wait(futures)

    data, figure = yield c.get_task_stream(plot=True)
    assert isinstance(figure, bokeh.plotting.Figure)
Exemplo n.º 20
0
def test_persist(c, s, a, b):
    x = delayed(inc)(1)
    y = delayed(inc)(x)

    xx, yy = c.persist([x, y], resources={x: {'A': 1}, y: {'B': 1}})

    yield wait([xx, yy])

    assert x.key in a.data
    assert y.key in b.data
Exemplo n.º 21
0
def test_progressbar_done(loop):
    with cluster() as (s, [a, b]):
        with Client(('127.0.0.1', s['port']), loop=loop) as c:
            L = [c.submit(inc, i) for i in range(5)]
            wait(L)
            p = ProgressWidget(L)
            sync(loop, p.listen)
            assert p.status == 'finished'
            assert p.bar.value == 1.0
            assert p.bar.bar_style == 'success'

            f = c.submit(throws, L)
            wait([f])

            p = ProgressWidget([f])
            sync(loop, p.listen)
            assert p.status == 'error'
            assert p.bar.value == 0.0
            assert p.bar.bar_style == 'danger'
Exemplo n.º 22
0
def test_client(c, s, a, b):
    L = yield c.get_task_stream()
    assert L == ()

    futures = c.map(slowinc, range(10), delay=0.1)
    yield wait(futures)

    tasks = [p for p in s.plugins if isinstance(p, TaskStreamPlugin)][0]
    L = yield c.get_task_stream()
    assert L == tuple(tasks.buffer)
Exemplo n.º 23
0
def test_persist_tuple(c, s, a, b):
    x = delayed(inc)(1)
    y = delayed(inc)(x)

    xx, yy = c.persist([x, y], resources={(x, y): {'A': 1}})

    yield wait([xx, yy])

    assert x.key in a.data
    assert y.key in a.data
    assert not b.data
Exemplo n.º 24
0
def test_submit_many_non_overlapping(c, s, a, b):
    futures = c.map(slowinc, range(100), resources={'A': 1}, delay=0.02)

    while len(a.data) + len(b.data) < 100:
        yield gen.sleep(0.01)
        assert len(a.executing) <= 2
        assert len(b.executing) <= 1

    yield wait(futures)
    assert a.total_resources == a.available_resources
    assert b.total_resources == b.available_resources
Exemplo n.º 25
0
def test_dataframe_set_index_sync(loop, wait):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            with dask.set_options(get=c.get):
                df = dd.demo.make_timeseries('2000',
                                             '2001', {
                                                 'value': float,
                                                 'name': str,
                                                 'id': int
                                             },
                                             freq='2H',
                                             partition_freq='1M',
                                             seed=1)
                df = c.persist(df)
                wait(df)

                df2 = df.set_index('name', shuffle='tasks')
                df2 = c.persist(df2)

                assert len(df2)
Exemplo n.º 26
0
def test_resource_submit(c, s, a, b):
    x = c.submit(inc, 1, resources={'A': 3})
    y = c.submit(inc, 2, resources={'B': 1})
    z = c.submit(inc, 3, resources={'C': 2})

    yield wait(x)
    assert x.key in a.data

    yield wait(y)
    assert y.key in b.data

    assert s.get_task_status(keys=[z.key]) == {z.key: 'no-worker'}

    d = Worker(s.ip, s.port, loop=s.loop, resources={'C': 10})
    yield d._start()

    yield wait(z)
    assert z.key in d.data

    yield d._close()
Exemplo n.º 27
0
def test_resource_submit(c, s, a, b):
    x = c.submit(inc, 1, resources={'A': 3})
    y = c.submit(inc, 2, resources={'B': 1})
    z = c.submit(inc, 3, resources={'C': 2})

    yield wait(x)
    assert x.key in a.data

    yield wait(y)
    assert y.key in b.data

    assert s.get_task_status(keys=[z.key]) == {z.key: 'no-worker'}

    d = Worker(s.ip, s.port, loop=s.loop, resources={'C': 10})
    yield d._start()

    yield wait(z)
    assert z.key in d.data

    yield d._close()
Exemplo n.º 28
0
def test_dataframe_set_index_sync(wait, client):
    df = dd.demo.make_timeseries(
        "2000",
        "2001",
        {
            "value": float,
            "name": str,
            "id": int
        },
        freq="2H",
        partition_freq="1M",
        seed=1,
    )
    df = client.persist(df)
    wait(df)

    df2 = df.set_index("name", shuffle="tasks")
    df2 = client.persist(df2)

    assert len(df2)
Exemplo n.º 29
0
def test_resource_submit(c, s, a, b):
    x = c.submit(inc, 1, resources={'A': 3})
    y = c.submit(inc, 2, resources={'B': 1})
    z = c.submit(inc, 3, resources={'C': 2})

    yield wait(x)
    assert x.key in a.data

    yield wait(y)
    assert y.key in b.data

    assert z.key in s.unrunnable

    d = Worker(s.ip, s.port, loop=s.loop, resources={'C': 10})
    yield d._start()

    yield wait(z)
    assert z.key in d.data

    yield d._close()
Exemplo n.º 30
0
def test_dataframe_set_index_sync(wait, client):
    df = dask.datasets.timeseries(
        start="2000",
        end="2001",
        dtypes={
            "value": float,
            "name": str,
            "id": int
        },
        freq="2H",
        partition_freq="1M",
        seed=1,
    )
    df = df.persist()
    wait(df)

    df2 = df.set_index("name", shuffle="tasks")
    df2 = df2.persist()

    assert len(df2)
Exemplo n.º 31
0
def test_task_stream_clear_interval(c, s, a, b):
    ts = TaskStream(s, clear_interval=200)

    yield wait(c.map(inc, range(10)))
    ts.update()
    yield gen.sleep(0.010)
    yield wait(c.map(dec, range(10)))
    ts.update()

    assert len(set(map(len, ts.source.data.values()))) == 1
    assert ts.source.data['name'].count('inc') == 10
    assert ts.source.data['name'].count('dec') == 10

    yield gen.sleep(0.300)
    yield wait(c.map(inc, range(10, 20)))
    ts.update()

    assert len(set(map(len, ts.source.data.values()))) == 1
    assert ts.source.data['name'].count('inc') == 10
    assert ts.source.data['name'].count('dec') == 0
Exemplo n.º 32
0
def test_submit_many_non_overlapping(c, s, a, b):
    futures = c.map(slowinc, range(100), resources={"A": 1}, delay=0.02)

    while len(a.data) + len(b.data) < 100:
        yield gen.sleep(0.01)
        assert len(a.executing) <= 2
        assert len(b.executing) <= 1

    yield wait(futures)
    assert a.total_resources == a.available_resources
    assert b.total_resources == b.available_resources
Exemplo n.º 33
0
def test_persist_tuple(c, s, a, b):
    x = delayed(inc)(1)
    y = delayed(inc)(x)

    xx, yy = c.persist([x, y], resources={(x, y): {"A": 1}})

    yield wait([xx, yy])

    assert x.key in a.data
    assert y.key in a.data
    assert not b.data
Exemplo n.º 34
0
def test_CurrentLoad(c, s, a, b):
    cl = CurrentLoad(s)

    futures = c.map(slowinc, range(10), delay=0.001)
    yield wait(futures)

    cl.update()
    d = dict(cl.source.data)

    assert all(len(L) == 2 for L in d.values())
    assert all(d['nbytes'])
Exemplo n.º 35
0
def test_tls(c, s, a, b):
    x = c.submit(inc, 1)
    y = c.submit(inc, x)
    z = c.submit(inc, y)
    yield wait(z)

    progress = ProgressWidget([z], scheduler=s.address, complete=True)
    yield progress.listen()

    assert progress.bar.value == 1.0
    assert '3 / 3' in progress.bar_text.value
Exemplo n.º 36
0
 def test_dataset_grid_results(self):
     examples, labels = _create_dataset()
     ds, _ = DataSet.objects.get_or_create(
         name='TEST',
         examples=SimpleUploadedFile(examples.name, examples.read()),
         labels=SimpleUploadedFile(labels.name, labels.read()))
     gs = ATGridSearchCV(tree.DecisionTreeClassifier(), {
         'criterion': ['gini', 'entropy'],
         'max_depth': range(1, 21),
         'max_features': ['auto', 'log2', 'sqrt', None]
     },
                         dataset=ds.pk,
                         webserver_url=self.live_server_url)
     wait(gs.fit())
     client = DjangoClient()
     response = client.get(
         reverse('grid_results', kwargs={'uuid': gs._uuid}))
     self.assertEqual(200, response.status_code)
     self.assertEqual(
         GridSearch.objects.get(uuid=gs._uuid).results.all().count(),
         len(response.data))
Exemplo n.º 37
0
def test_worksteal_many_thieves(c, s, *workers):
    x = c.submit(slowinc, -1, delay=0.1)
    yield x

    xs = c.map(slowinc, [x] * 100, pure=False, delay=0.1)

    yield wait(xs)

    for w, keys in s.has_what.items():
        assert 2 < len(keys) < 30

    assert sum(map(len, s.has_what.values())) < 150
Exemplo n.º 38
0
def test_prefer_constrained(c, s, a):
    futures = c.map(slowinc, range(1000), delay=0.1)
    constrained = c.map(inc, range(10), resources={'A': 1})

    start = time()
    yield wait(constrained)
    end = time()
    assert end - start < 4
    has_what = dict(s.has_what)
    processing = dict(s.processing)
    assert len(has_what) < len(constrained) + 2  # at most two slowinc's finished
    assert s.processing[a.address]
Exemplo n.º 39
0
def test_persist_collections(c, s, a, b):
    da = pytest.importorskip('dask.array')
    x = da.arange(10, chunks=(5,))
    y = x.map_blocks(lambda x: x + 1)
    z = y.map_blocks(lambda x: 2 * x)
    w = z.sum()

    ww, yy = c.persist([w, y], resources={tuple(y.__dask_keys__()): {'A': 1}})

    yield wait([ww, yy])

    assert all(tokey(key) in a.data for key in y.__dask_keys__())
Exemplo n.º 40
0
def test_persist_collections(c, s, a, b):
    da = pytest.importorskip('dask.array')
    x = da.arange(10, chunks=(5,))
    y = x.map_blocks(lambda x: x + 1)
    z = y.map_blocks(lambda x: 2 * x)
    w = z.sum()

    ww, yy = c.persist([w, y], resources={tuple(y.__dask_keys__()): {'A': 1}})

    yield wait([ww, yy])

    assert all(tokey(key) in a.data for key in y.__dask_keys__())
Exemplo n.º 41
0
def test_prefer_constrained(c, s, a):
    futures = c.map(slowinc, range(1000), delay=0.1)
    constrained = c.map(inc, range(10), resources={'A': 1})

    start = time()
    yield wait(constrained)
    end = time()
    assert end - start < 4
    has_what = dict(s.has_what)
    processing = dict(s.processing)
    assert len(has_what) < len(constrained) + 2  # at most two slowinc's finished
    assert s.processing[a.address]
Exemplo n.º 42
0
def test_progressbar_done(loop):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            L = [c.submit(inc, i) for i in range(5)]
            wait(L)
            p = ProgressWidget(L)
            sync(loop, p.listen)
            assert p.status == 'finished'
            assert p.bar.value == 1.0
            assert p.bar.bar_style == 'success'
            assert 'Finished' in p.elapsed_time.value

            f = c.submit(throws, L)
            wait([f])

            p = ProgressWidget([f])
            sync(loop, p.listen)
            assert p.status == 'error'
            assert p.bar.value == 0.0
            assert p.bar.bar_style == 'danger'
            assert 'Exception' in p.elapsed_time.value
Exemplo n.º 43
0
def test_write_bytes(c, s, a, b):
    with make_hdfs() as (hdfs, basedir):
        hdfs.mkdir('%s/data/' % basedir)
        data = [b'123', b'456', b'789']
        remote_data = yield c._scatter(data)

        futures = c.compute(
            write_bytes(remote_data, 'hdfs://%s/data/file.*.dat' % basedir))
        yield wait(futures)

        yield futures[0]

        assert len(hdfs.ls('%s/data/' % basedir)) == 3
        with hdfs.open('%s/data/file.1.dat' % basedir) as f:
            assert f.read() == b'456'

        hdfs.mkdir('%s/data2/' % basedir)
        futures = c.compute(
            write_bytes(remote_data, 'hdfs://%s/data2/' % basedir))
        yield wait(futures)

        assert len(hdfs.ls('%s/data2/' % basedir)) == 3
Exemplo n.º 44
0
def test_AllProgress_lost_key(c, s, a, b, timeout=None):
    p = AllProgress(s)
    futures = c.map(inc, range(5))
    yield wait(futures)
    assert len(p.state['memory']['inc']) == 5

    yield a._close()
    yield b._close()

    start = time()
    while len(p.state['memory']['inc']) > 0:
        yield gen.sleep(0.1)
        assert time() < start + 2
Exemplo n.º 45
0
def test_TaskGraph(c, s, a, b):
    gp = TaskGraph(s)
    futures = c.map(inc, range(5))
    total = c.submit(sum, futures)
    yield total

    gp.update()
    assert set(map(len, gp.node_source.data.values())) == {6}
    assert set(map(len, gp.edge_source.data.values())) == {5}
    json.dumps(gp.edge_source.data)
    json.dumps(gp.node_source.data)

    da = pytest.importorskip("dask.array")
    x = da.random.random((20, 20), chunks=(10, 10)).persist()
    y = (x + x.T) - x.mean(axis=0)
    y = y.persist()
    yield wait(y)

    gp.update()
    gp.update()

    yield c.compute((x + y).sum())

    gp.update()

    future = c.submit(inc, 10)
    future2 = c.submit(inc, future)
    yield wait(future2)
    key = future.key
    del future, future2
    while key in s.tasks:
        yield gen.sleep(0.01)

    assert "memory" in gp.node_source.data["state"]

    gp.update()
    gp.update()

    assert not all(x == "False" for x in gp.edge_source.data["visible"])
Exemplo n.º 46
0
def test_AllProgress_lost_key(c, s, a, b, timeout=None):
    p = AllProgress(s)
    futures = c.map(inc, range(5))
    yield wait(futures)
    assert len(p.state["memory"]["inc"]) == 5

    yield a._close()
    yield b._close()

    start = time()
    while len(p.state["memory"]["inc"]) > 0:
        yield gen.sleep(0.1)
        assert time() < start + 5
Exemplo n.º 47
0
def test_progressbar_widget(c, s, a, b):
    x = c.submit(inc, 1)
    y = c.submit(inc, x)
    z = c.submit(inc, y)
    yield wait(z)

    progress = ProgressWidget([z.key], scheduler=(s.ip, s.port), complete=True)
    yield progress.listen()

    assert progress.bar.value == 1.0
    assert '3 / 3' in progress.bar_text.value

    progress = ProgressWidget([z.key], scheduler=(s.ip, s.port))
    yield progress.listen()
Exemplo n.º 48
0
def test_task_stream(c, s, a, b):
    ts = TaskStream(s)

    futures = c.map(slowinc, range(10), delay=0.001)

    yield wait(futures)

    ts.update()
    d = dict(ts.source.data)

    assert all(len(L) == 10 for L in d.values())
    assert min(d["start"]) == 0  # zero based

    ts.update()
    d = dict(ts.source.data)
    assert all(len(L) == 10 for L in d.values())

    total = c.submit(sum, futures)
    yield wait(total)

    ts.update()
    d = dict(ts.source.data)
    assert len(set(map(len, d.values()))) == 1
Exemplo n.º 49
0
    def test_pandas_input(self):
        # check cross_val_score doesn't destroy pandas dataframe
        types = [(MockDataFrame, MockDataFrame)]
        try:
            from pandas import Series, DataFrame
            types.append((DataFrame, Series))
        except ImportError:
            pass

        X = np.arange(100).reshape(10, 10)
        y = np.array([0] * 5 + [1] * 5)

        for InputFeatureType, TargetType in types:
            # X dataframe, y series
            X_df, y_ser = InputFeatureType(X), TargetType(y)
            check_df = lambda x: isinstance(x, InputFeatureType)
            check_series = lambda x: isinstance(x, TargetType)
            clf = CheckingClassifier(check_X=check_df, check_y=check_series)

            grid_search = ATGridSearchCV(clf, {'foo_param': [1, 2, 3]},
                                         webserver_url=self.live_server_url)
            wait(grid_search.fit(X_df, y_ser))
            assert_true(hasattr(grid_search, "grid_scores_"))
Exemplo n.º 50
0
    def test_grid_search_sparse(self):
        # Test that grid search works with both dense and sparse matrices
        X_, y_ = make_classification(n_samples=200,
                                     n_features=100,
                                     random_state=0)

        clf = LinearSVC()
        cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]},
                            webserver_url=self.live_server_url)
        wait(cv.fit(X_[:180], y_[:180]))
        y_pred = cv.best_estimator_.predict(X_[180:])
        C = cv.best_estimator_.C

        X_ = sp.csr_matrix(X_)
        clf = LinearSVC()
        cv = ATGridSearchCV(clf, {'C': [0.1, 1.0]},
                            webserver_url=self.live_server_url)
        wait(cv.fit(X_[:180].tocoo(), y_[:180]))
        y_pred2 = cv.best_estimator_.predict(X_[180:])
        C2 = cv.best_estimator_.C

        assert_true(np.mean(y_pred == y_pred2) >= .9)
        assert_equal(C, C2)
Exemplo n.º 51
0
def test_TaskProgress_empty(c, s, a, b):
    tp = TaskProgress(s)
    tp.update()

    futures = [c.submit(inc, i, key="f-" + "a" * i) for i in range(20)]
    yield wait(futures)
    tp.update()

    del futures
    while s.tasks:
        yield gen.sleep(0.01)
    tp.update()

    assert not any(len(v) for v in tp.source.data.values())
Exemplo n.º 52
0
def test_progressbar_widget(c, s, a, b):
    x = c.submit(inc, 1)
    y = c.submit(inc, x)
    z = c.submit(inc, y)
    yield wait(z)

    progress = ProgressWidget([z.key], scheduler=s.address, complete=True)
    yield progress.listen()

    assert progress.bar.value == 1.0
    assert "3 / 3" in progress.bar_text.value

    progress = ProgressWidget([z.key], scheduler=s.address)
    yield progress.listen()
Exemplo n.º 53
0
    def test_grid_search_no_score(self):
        # Test grid-search on classifier that has no score function.
        clf = LinearSVC(random_state=0)
        X, y = make_blobs(random_state=0, centers=2)
        Cs = [.1, 1, 10]
        clf_no_score = LinearSVCNoScore(random_state=0)
        grid_search = ATGridSearchCV(clf, {'C': Cs},
                                     scoring='accuracy',
                                     webserver_url=self.live_server_url)
        wait(grid_search.fit(X, y))

        grid_search_no_score = ATGridSearchCV(
            clf_no_score, {'C': Cs},
            scoring='accuracy',
            webserver_url=self.live_server_url)
        # smoketest grid search
        wait(grid_search_no_score.fit(X, y))

        # check that best params are equal
        try:
            assert_equal(grid_search_no_score.best_params_,
                         grid_search.best_params_)
        except AssertionError:
            if grid_search.best_params_ == {'C': 1}:
                assert_equal(grid_search_no_score.best_params_, {'C': 10})
            else:
                assert_equal(grid_search_no_score.best_params_, {'C': 1})
        # check that we can call score and that it gives the correct result
        assert_equal(grid_search.score(X, y), grid_search_no_score.score(X, y))

        # giving no scoring function raises an error
        grid_search_no_score = ATGridSearchCV(
            clf_no_score, {'C': Cs}, webserver_url=self.live_server_url)
        assert_raise_message(TypeError,
                             "no scoring",
                             grid_search_no_score.fit, [[1]],
                             webserver_url=self.live_server_url)
Exemplo n.º 54
0
def test_GraphPlot(c, s, a, b):
    gp = GraphPlot(s)
    futures = c.map(inc, range(5))
    total = c.submit(sum, futures)
    yield total

    gp.update()
    assert set(map(len, gp.node_source.data.values())) == {6}
    assert set(map(len, gp.edge_source.data.values())) == {5}

    da = pytest.importorskip('dask.array')
    x = da.random.random((20, 20), chunks=(10, 10)).persist()
    y = (x + x.T) - x.mean(axis=0)
    y = y.persist()
    yield wait(y)

    gp.update()
    gp.update()

    yield c.compute((x + y).sum())

    gp.update()

    future = c.submit(inc, 10)
    future2 = c.submit(inc, future)
    yield wait(future2)
    key = future.key
    del future, future2
    while key in s.tasks:
        yield gen.sleep(0.01)

    assert 'memory' in gp.node_source.data['state']

    gp.update()
    gp.update()

    assert not all(x == 'False' for x in gp.edge_source.data['visible'])
Exemplo n.º 55
0
def test_TaskGraph_limit(c, s, a, b):
    gp = TaskGraph(s)

    def func(x):
        return x

    f1 = c.submit(func, 1)
    yield wait(f1)
    gp.update()
    assert len(gp.node_source.data["x"]) == 1
    f2 = c.submit(func, 2)
    yield wait(f2)
    gp.update()
    assert len(gp.node_source.data["x"]) == 2
    f3 = c.submit(func, 3)
    yield wait(f3)
    gp.update()
    assert len(gp.node_source.data["x"]) == 2
    del f1
    del f2
    del f3
    _ = c.submit(func, 1)

    async_wait_for(lambda: len(gp.node_source.data["x"]) == 1, timeout=1)
Exemplo n.º 56
0
def test_resources_str(c, s, a, b):
    pd = pytest.importorskip("pandas")
    dd = pytest.importorskip("dask.dataframe")

    yield a.set_resources(MyRes=1)

    x = dd.from_pandas(pd.DataFrame({"A": [1, 2], "B": [3, 4]}), npartitions=1)
    y = x.apply(lambda row: row.sum(), axis=1, meta=(None, "int64"))
    yy = y.persist(resources={"MyRes": 1})
    yield wait(yy)

    ts_first = s.tasks[tokey(y.__dask_keys__()[0])]
    assert ts_first.resource_restrictions == {"MyRes": 1}
    ts_last = s.tasks[tokey(y.__dask_keys__()[-1])]
    assert ts_last.resource_restrictions == {"MyRes": 1}
Exemplo n.º 57
0
def test_progressbar_done(client):
    L = [client.submit(inc, i) for i in range(5)]
    wait(L)
    p = ProgressWidget(L)
    client.sync(p.listen)
    assert p.status == 'finished'
    assert p.bar.value == 1.0
    assert p.bar.bar_style == 'success'
    assert 'Finished' in p.elapsed_time.value

    f = client.submit(throws, L)
    wait([f])

    p = ProgressWidget([f])
    client.sync(p.listen)
    assert p.status == 'error'
    assert p.bar.value == 0.0
    assert p.bar.bar_style == 'danger'
    assert 'Exception' in p.elapsed_time.value

    try:
        throws(1)
    except Exception as e:
        assert repr(e) in p.elapsed_time.value
Exemplo n.º 58
0
def test_progressbar_done(client):
    L = [client.submit(inc, i) for i in range(5)]
    wait(L)
    p = ProgressWidget(L)
    client.sync(p.listen)
    assert p.status == "finished"
    assert p.bar.value == 1.0
    assert p.bar.bar_style == "success"
    assert "Finished" in p.elapsed_time.value

    f = client.submit(throws, L)
    wait([f])

    p = ProgressWidget([f])
    client.sync(p.listen)
    assert p.status == "error"
    assert p.bar.value == 0.0
    assert p.bar.bar_style == "danger"
    assert "Exception" in p.elapsed_time.value

    try:
        throws(1)
    except Exception as e:
        assert repr(e) in p.elapsed_time.value
Exemplo n.º 59
0
    def test_grid_search_score_method(self):
        X, y = make_classification(n_samples=100,
                                   n_classes=2,
                                   flip_y=.2,
                                   random_state=0)
        clf = LinearSVC(random_state=0)
        grid = {'C': [.1]}

        search_no_scoring = ATGridSearchCV(clf,
                                           grid,
                                           scoring=None,
                                           webserver_url=self.live_server_url)
        wait(search_no_scoring.fit(X, y))
        search_accuracy = ATGridSearchCV(clf,
                                         grid,
                                         scoring='accuracy',
                                         webserver_url=self.live_server_url)
        wait(search_accuracy.fit(X, y))
        search_no_score_method_auc = ATGridSearchCV(
            LinearSVCNoScore(),
            grid,
            scoring='roc_auc',
            webserver_url=self.live_server_url)
        wait(search_no_score_method_auc.fit(X, y))
        search_auc = ATGridSearchCV(clf,
                                    grid,
                                    scoring='roc_auc',
                                    webserver_url=self.live_server_url)
        wait(search_auc.fit(X, y))

        # ChangedBehaviourWarning occurred previously (prior to #9005)
        score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y)
        score_accuracy = assert_no_warnings(search_accuracy.score, X, y)
        score_no_score_auc = assert_no_warnings(
            search_no_score_method_auc.score, X, y)
        score_auc = assert_no_warnings(search_auc.score, X, y)

        # ensure the test is sane
        assert_true(score_auc < 1.0)
        assert_true(score_accuracy < 1.0)
        assert_not_equal(score_auc, score_accuracy)

        assert_almost_equal(score_accuracy, score_no_scoring)
        assert_almost_equal(score_auc, score_no_score_auc)
Exemplo n.º 60
0
def test_CommunicatingStream(c, s, a, b):
    aa = CommunicatingStream(a)
    bb = CommunicatingStream(b)

    xs = c.map(inc, range(10), workers=a.address)
    ys = c.map(dec, range(10), workers=b.address)
    adds = c.map(add, xs, ys, workers=a.address)
    subs = c.map(sub, xs, ys, workers=b.address)

    yield wait([adds, subs])

    aa.update()
    bb.update()

    assert (len(first(aa.outgoing.data.values())) and
            len(first(bb.outgoing.data.values())))
    assert (len(first(aa.incoming.data.values())) and
            len(first(bb.incoming.data.values())))