示例#1
0
def test_cupy():
    t1 = mt.ones((100, 50), chunk_size=50, gpu=True)
    t2 = mt.ones(50, chunk_size=50, gpu=True)
    t = (t1 - t2) / mt.sqrt(t2 * (1 - t2) * len(t2))

    graph = TileableGraph([t.data])
    next(TileableGraphBuilder(graph).build())
    context = dict()
    chunk_graph_builder = ChunkGraphBuilder(graph,
                                            fuse_enabled=False,
                                            tile_context=context)
    chunk_graph = next(chunk_graph_builder.build())

    CupyRuntimeOptimizer(chunk_graph).optimize()
    assert any(n.op.__class__.__name__ == 'TensorCpFuseChunk'
               for n in chunk_graph)
示例#2
0
def test_k_means_init_large_n_clusters():
    chunk_bytes_limit = options.chunk_store_limit * 2
    n_cluster = 2000
    x = mt.random.rand(1000_000, 64, chunk_size=250_000)

    centers = _init_centroids(x, n_cluster, init='k-means||')
    t_graph = next(TileableGraphBuilder(TileableGraph([centers])).build())
    graph = next(ChunkGraphBuilder(t_graph).build())
    for c in graph:
        nbytes = c.nbytes
        if not np.isnan(nbytes):
            assert nbytes <= chunk_bytes_limit
示例#3
0
文件: test_head.py 项目: haijohn/mars
def test_read_csv_head(gen_data1):
    pdf, tempdir = gen_data1
    file_path = os.path.join(tempdir, 'test.csv')
    pdf.to_csv(file_path)

    df1 = md.read_csv(file_path)
    df2 = df1.head(5)
    graph = TileableGraph([df2.data])
    next(TileableGraphBuilder(graph).build())
    context = dict()
    chunk_graph_builder = ChunkGraphBuilder(graph,
                                            fuse_enabled=False,
                                            tile_context=context)
    chunk_graph = next(chunk_graph_builder.build())
    chunk1 = context[df1.data].chunks[0].data
    chunk2 = context[df2.data].chunks[0].data
    records = optimize(chunk_graph)
    assert records.get_optimization_result(chunk1) is None
    opt_chunk2 = records.get_optimization_result(chunk2)
    assert opt_chunk2.op.nrows == 5
    assert len(chunk_graph) == 1
    assert opt_chunk2 in chunk_graph.results
示例#4
0
def test_groupby_read_csv(gen_data1):
    pdf, tempdir = gen_data1
    file_path = os.path.join(tempdir, 'test.csv')
    pdf.to_csv(file_path)

    df1 = md.read_csv(file_path)
    df2 = df1[['a', 'b']]
    graph = TileableGraph([df2.data])
    next(TileableGraphBuilder(graph).build())
    context = dict()
    chunk_graph_builder = ChunkGraphBuilder(graph,
                                            fuse_enabled=False,
                                            tile_context=context)
    chunk_graph = next(chunk_graph_builder.build())
    chunk1 = context[df1.data].chunks[0].data
    chunk2 = context[df2.data].chunks[0].data
    records = optimize(chunk_graph)
    opt_chunk1 = records.get_optimization_result(chunk1)
    assert opt_chunk1 is None
    opt_chunk2 = records.get_optimization_result(chunk2)
    assert opt_chunk2 is not None
    assert opt_chunk2.op.usecols == ['a', 'b']
    # original tileable should not be modified
    assert chunk2.inputs[0] is chunk1
示例#5
0
async def test_execute_with_cancel(actor_pool, cancel_phase):
    pool, session_id, meta_api, storage_api, execution_ref = actor_pool

    # config for different phases
    ref_to_delay = None
    if cancel_phase == 'prepare':
        ref_to_delay = await mo.actor_ref(StorageManagerActor.default_uid(),
                                          address=pool.external_address)
    elif cancel_phase == 'quota':
        ref_to_delay = await mo.actor_ref(QuotaActor.gen_uid('numa-0'),
                                          address=pool.external_address)
    elif cancel_phase == 'slot':
        ref_to_delay = await mo.actor_ref(
            BandSlotManagerActor.gen_uid('numa-0'),
            address=pool.external_address)
    if ref_to_delay:
        await ref_to_delay.set_delay_fetch_time(100)

    def delay_fun(delay, _inp1):
        time.sleep(delay)
        return delay

    input1 = TensorFetch(key='input1',
                         source_key='input1',
                         dtype=np.dtype(int)).new_chunk([])
    remote_result = RemoteFunction(function=delay_fun, function_args=[100, input1],
                                   function_kwargs={}, n_output=1) \
        .new_chunk([input1])

    data1 = np.random.rand(10, 10)
    await meta_api.set_chunk_meta(input1,
                                  memory_size=data1.nbytes,
                                  store_size=data1.nbytes,
                                  bands=[(pool.external_address, 'numa-0')])
    await storage_api.put(input1.key, data1)

    chunk_graph = ChunkGraph([remote_result])
    chunk_graph.add_node(input1)
    chunk_graph.add_node(remote_result)
    chunk_graph.add_edge(input1, remote_result)

    subtask = Subtask(f'test_task_{uuid.uuid4()}',
                      session_id=session_id,
                      chunk_graph=chunk_graph)
    aiotask = asyncio.create_task(
        execution_ref.run_subtask(subtask, 'numa-0', pool.external_address))
    await asyncio.sleep(1)

    with Timer() as timer:
        await execution_ref.cancel_subtask(subtask.subtask_id, kill_timeout=1)
        with pytest.raises(asyncio.CancelledError):
            await asyncio.wait_for(aiotask, timeout=30)
    assert timer.duration < 6

    # check for different phases
    if ref_to_delay is not None:
        assert await ref_to_delay.get_is_cancelled()
        await ref_to_delay.set_delay_fetch_time(0)

    # test if slot is restored
    remote_tileable = mr.spawn(delay_fun, args=(0.5, None))
    graph = TileableGraph([remote_tileable.data])
    next(TileableGraphBuilder(graph).build())

    chunk_graph = next(ChunkGraphBuilder(graph, fuse_enabled=False).build())

    subtask = Subtask(f'test_task2_{uuid.uuid4()}',
                      session_id=session_id,
                      chunk_graph=chunk_graph)
    await asyncio.wait_for(execution_ref.run_subtask(subtask, 'numa-0',
                                                     pool.external_address),
                           timeout=30)