def test_sync_nodes_only_diff(ray_start_2_cpus, temp_data_dirs):
    """Check that only differing files are synced between nodes"""
    tmp_source, tmp_target = temp_data_dirs

    # Sanity check
    assert_file(True, tmp_source, "level0.txt")
    assert_file(True, tmp_source, "subdir/level1.txt")
    assert_file(False, tmp_target, "level0.txt")
    assert_file(False, tmp_target, "level0_new.txt")

    node_ip = ray.util.get_node_ip_address()
    _sync_dir_between_different_nodes(
        source_ip=node_ip,
        source_path=tmp_source,
        target_ip=node_ip,
        target_path=tmp_target,
    )

    assert_file(True, tmp_source, "level0.txt")
    assert_file(True, tmp_target, "level0.txt")
    assert_file(True, tmp_target, "subdir/level1.txt")
    assert_file(False, tmp_target, "level0_new.txt")

    # Add new file
    with open(os.path.join(tmp_source, "level0_new.txt"), "w") as f:
        f.write("Data\n")

    # Modify existing file
    with open(os.path.join(tmp_source, "subdir", "level1.txt"), "w") as f:
        f.write("New data\n")

    unpack, pack_actor, files_stats = _sync_dir_between_different_nodes(
        source_ip=node_ip,
        source_path=tmp_source,
        target_ip=node_ip,
        target_path=tmp_target,
        return_futures=True,
    )

    files_stats = ray.get(files_stats)
    tarball = ray.get(pack_actor.get_full_data.remote())

    assert "./level0.txt" in files_stats
    assert "./level0_new.txt" not in files_stats  # Was not in target dir
    assert "subdir/level1.txt" in files_stats

    with tarfile.open(fileobj=io.BytesIO(tarball)) as tar:
        files_in_tar = tar.getnames()
        assert "./level0.txt" not in files_in_tar
        assert "./level0_new.txt" in files_in_tar
        assert "subdir/level1.txt" in files_in_tar
        assert len(files_in_tar) == 7  # 3 files, 4 dirs (including root)

    ray.get(unpack)  # Wait until finished for teardown
def test_max_size_exceeded(ray_start_2_cpus, temp_data_dirs):
    tmp_source, tmp_target = temp_data_dirs

    node_ip = ray.util.get_node_ip_address()
    with pytest.raises(RayTaskError):
        _sync_dir_between_different_nodes(
            source_ip=node_ip,
            source_path=tmp_source,
            target_ip=node_ip,
            target_path=tmp_target,
            max_size_bytes=2,
        )
def test_sync_nodes(ray_start_2_cpus, temp_data_dirs):
    """Check that syncing between nodes works (data is found in target directory)"""
    tmp_source, tmp_target = temp_data_dirs

    assert_file(True, tmp_source, "level0.txt")
    assert_file(True, tmp_source, "subdir/level1.txt")
    assert_file(False, tmp_target, "level0.txt")
    assert_file(False, tmp_target, "subdir/level1.txt")

    node_ip = ray.util.get_node_ip_address()
    _sync_dir_between_different_nodes(
        source_ip=node_ip,
        source_path=tmp_source,
        target_ip=node_ip,
        target_path=tmp_target,
    )

    assert_file(True, tmp_target, "level0.txt")
    assert_file(True, tmp_target, "subdir/level1.txt")
def test_multi_sync_different_node(ray_start_2_cpus, temp_data_dirs, num_workers):
    """Check that multiple competing syncs to the same dir don't interfere"""
    tmp_source, tmp_target = temp_data_dirs

    assert_file(True, tmp_source, "level0.txt")
    assert_file(True, tmp_source, "subdir/level1.txt")

    node_ip = ray.util.get_node_ip_address()
    futures = [
        _sync_dir_between_different_nodes(
            source_ip=node_ip,
            source_path=tmp_source,
            target_ip=node_ip,
            target_path=tmp_target,
            return_futures=True,
        )[0]
        for _ in range(num_workers)
    ]
    ray.get(futures)

    assert_file(True, tmp_target, "level0.txt")
    assert_file(True, tmp_target, "subdir/level1.txt")
示例#5
0
    def _testSyncBetweenNodesAndDelete(self, num_workers: int = 1):
        temp_source = tempfile.mkdtemp()
        temp_up_target = tempfile.mkdtemp()
        temp_down_target = tempfile.mkdtemp()
        self.addCleanup(shutil.rmtree, temp_source)
        self.addCleanup(shutil.rmtree, temp_up_target, ignore_errors=True)
        self.addCleanup(shutil.rmtree, temp_down_target)

        os.makedirs(os.path.join(temp_source, "dir_level0", "dir_level1"))
        with open(os.path.join(temp_source, "dir_level0", "file_level1.txt"),
                  "w") as f:
            f.write("Data\n")

        # Sanity check
        self._check_dir_contents(temp_source)
        node_ip = ray.util.get_node_ip_address()

        futures = [
            _sync_dir_between_different_nodes(
                source_ip=node_ip,
                source_path=temp_source,
                target_ip=node_ip,
                target_path=temp_up_target,
                return_futures=True,
            )[0] for i in range(num_workers)
        ]
        ray.get(futures)

        # Check sync up
        self._check_dir_contents(temp_up_target)

        # Max size exceeded
        with self.assertRaises(RayTaskError):
            _sync_dir_between_different_nodes(
                source_ip=node_ip,
                source_path=temp_up_target,
                target_ip=node_ip,
                target_path=temp_down_target,
                max_size_bytes=2,
            )

        assert not os.listdir(temp_down_target)

        futures = [
            _sync_dir_between_different_nodes(
                source_ip=node_ip,
                source_path=temp_up_target,
                target_ip=node_ip,
                target_path=temp_down_target,
                return_futures=True,
            )[0] for i in range(num_workers)
        ]
        ray.get(futures)

        # Check sync down
        self._check_dir_contents(temp_down_target)

        # Delete in some dir
        delete_on_node(node_ip=node_ip, path=temp_up_target)

        assert not os.path.exists(temp_up_target)
示例#6
0
 def _sync_function(self, *args, **kwargs):
     return _sync_dir_between_different_nodes(*args, **kwargs)