Пример #1
0
def test_task_unlimited():
    try:
        address = _init_ray()
        x1 = ray.put(np.zeros(400 * MB, dtype=np.uint8))
        refs = [x1]
        # x1 is spilled.
        x2 = ray.put(np.zeros(400 * MB, dtype=np.uint8))
        x2p = ray.get(x2)
        sentinel = ray.put(np.zeros(100 * MB, dtype=np.uint8))
        check_spilled_mb(address, spilled=400)

        @ray.remote
        def consume(refs):
            # triggers fallback allocation, spilling of the sentinel
            ray.get(refs[0])
            check_spilled_mb(address, spilled=500, restored=400, fallback=400)
            # triggers fallback allocation.
            return ray.put(np.zeros(400 * MB, dtype=np.uint8))

        # round 1
        _ = ray.get(ray.get(consume.remote(refs)))
        check_spilled_mb(address, spilled=500, restored=400, fallback=400)

        del x2p
        del sentinel
    finally:
        ray.shutdown()
Пример #2
0
def test_spilling_when_possible_on_put():
    try:
        address = _init_ray()
        results = []
        for _ in range(5):
            results.append(ray.put(np.zeros(400 * MB, dtype=np.uint8)))
        check_spilled_mb(address, spilled=1600)
    finally:
        ray.shutdown()
Пример #3
0
def test_fallback_when_spilling_impossible_on_put():
    try:
        address = _init_ray()
        x1 = ray.put(np.zeros(400 * MB, dtype=np.uint8))
        x1p = ray.get(x1)
        # x2 will be fallback allocated on the filesystem.
        x2 = ray.put(np.zeros(400 * MB, dtype=np.uint8))
        x2p = ray.get(x2)
        del x1p
        del x2p
        check_spilled_mb(address, spilled=None, fallback=400)
    finally:
        ray.shutdown()
Пример #4
0
def test_plasma_allocate(shutdown_only):
    address = ray.init(
        object_store_memory=300 * 1024 ** 2,
        _system_config={
            "max_io_workers": 4,
            "automatic_object_spilling_enabled": True,
        },
        _temp_dir="/tmp/for_test_plasma_allocate",
    )
    res = []
    data = np.random.randint(low=0, high=256, size=(90 * 1024 ** 2,), dtype=np.uint8)
    for _ in range(3):
        res.append(ray.put(data))
    # keep reference for second and third object, force evict first object
    _ = ray.get(res[1:])  # noqa
    # keep reference for fourth object, avoid released by plasma GC.
    __ = ray.put(data)  # noqa

    # Check fourth object allocate in memory.
    check_spilled_mb(address, spilled=[90, 180])
Пример #5
0
def test_task_unlimited_multiget_args():
    try:
        address = _init_ray()
        # Too many refs to fit into memory.
        refs = []
        for _ in range(10):
            refs.append(ray.put(np.zeros(200 * MB, dtype=np.uint8)))
        x2 = ray.put(np.zeros(600 * MB, dtype=np.uint8))
        x2p = ray.get(x2)
        check_spilled_mb(address, spilled=2000)

        @ray.remote
        def consume(refs):
            # Should work without thrashing.
            ray.get(refs)
            return os.getpid()

        ray.get([consume.remote(refs) for _ in range(1000)])
        check_spilled_mb(address, spilled=2000, restored=2000, fallback=2000)
        del x2p
    finally:
        ray.shutdown()
Пример #6
0
def test_spilling_when_possible_on_get():
    try:
        address = _init_ray()
        x1 = ray.put(np.zeros(400 * MB, dtype=np.uint8))
        # x1 will be spilled.
        x2 = ray.put(np.zeros(400 * MB, dtype=np.uint8))
        check_spilled_mb(address, spilled=400)
        # x1 will be restored, x2 will be spilled.
        ray.get(x1)
        check_spilled_mb(address, spilled=800, restored=400)
        # x2 will be restored, spilling x1.
        ray.get(x2)
        check_spilled_mb(address, spilled=800, restored=800)
    finally:
        ray.shutdown()
Пример #7
0
def test_fallback_when_spilling_impossible_on_get():
    try:
        address = _init_ray()
        x1 = ray.put(np.zeros(400 * MB, dtype=np.uint8))
        # x1 will be spilled.
        x2 = ray.put(np.zeros(400 * MB, dtype=np.uint8))
        check_spilled_mb(address, spilled=400)
        # x1 will be restored, x2 will be spilled.
        x1p = ray.get(x1)
        check_spilled_mb(address, spilled=800, restored=400)
        # x2 will be restored, triggering a fallback allocation.
        x2p = ray.get(x2)
        check_spilled_mb(address, spilled=800, restored=800, fallback=400)
        del x1p
        del x2p
    finally:
        ray.shutdown()
Пример #8
0
def test_object_store_memory_metrics_reported_correctly(shutdown_only):
    """
    Verify when fallback allocation is used, prometheus stats report the correct
    used object store memory. https://github.com/ray-project/ray/issues/24624
    """
    obj_store_memory = 700e6
    address = ray.init(
        num_cpus=2,
        object_store_memory=obj_store_memory,
        _system_config={"metrics_report_interval_ms": 1000},
    )
    metrics_export_port = address["metrics_export_port"]
    addr = address["node_ip_address"]
    prom_addr = f"{addr}:{metrics_export_port}"

    x1 = ray.put(np.zeros(400 * MB, dtype=np.uint8))
    # x1 will be spilled.
    x2 = ray.put(np.zeros(400 * MB, dtype=np.uint8))
    check_spilled_mb(address, spilled=400)
    # x1 will be restored, x2 will be spilled.
    x1p = ray.get(x1)
    check_spilled_mb(address, spilled=800, restored=400)
    # x2 will be restored, triggering a fallback allocation.
    x2p = ray.get(x2)
    check_spilled_mb(address, spilled=800, restored=800, fallback=400)

    def verify_used_object_store_memory(expected_mb):
        components_dict, metric_names, metric_samples = fetch_prometheus(
            [prom_addr])

        def in_mb(bytes):
            return int(bytes / 1024 / 1024)

        total_memory = in_mb(obj_store_memory)
        available_memory_sample = None
        used_memory_sample = None
        fallback_memory_sample = None

        for sample in metric_samples:
            if sample.name == "ray_object_store_available_memory":
                available_memory_sample = sample
            if sample.name == "ray_object_store_used_memory":
                used_memory_sample = sample
            if sample.name == "ray_object_store_fallback_memory":
                fallback_memory_sample = sample

        if not (available_memory_sample and used_memory_sample
                and fallback_memory_sample):
            return False

        avail_memory = in_mb(available_memory_sample.value)
        used_memory = in_mb(used_memory_sample.value)
        fallback_memory = in_mb(fallback_memory_sample.value)

        assert avail_memory == total_memory - used_memory
        assert used_memory == 400  # 400MB
        assert fallback_memory == 400
        return True

    wait_for_condition(lambda: verify_used_object_store_memory(expected_mb=30))
    del x1p
    del x2p
Пример #9
0
 def consume(refs):
     # triggers fallback allocation, spilling of the sentinel
     ray.get(refs[0])
     check_spilled_mb(address, spilled=500, restored=400, fallback=400)
     # triggers fallback allocation.
     return ray.put(np.zeros(400 * MB, dtype=np.uint8))