示例#1
0
    def test_fs_checkpoint_additional_fields(self):
        checkpoint = self._prepare_fs_checkpoint()

        # Convert to dict
        checkpoint_dict = checkpoint.to_dict()

        # Add field to dict
        checkpoint_dict["additional_field"] = "data"

        # Create new checkpoint object
        checkpoint = Checkpoint.from_dict(checkpoint_dict)

        # Turn into FS
        checkpoint_dir = checkpoint.to_directory()

        assert os.path.exists(os.path.join(checkpoint_dir, "test_data.pkl"))
        assert os.path.exists(
            os.path.join(checkpoint_dir, "additional_field.meta.pkl"))

        # Add new file
        with open(os.path.join(checkpoint_dir, "even_more.txt"), "w") as f:
            f.write("More\n")

        # Turn into dict
        new_dict = Checkpoint.from_directory(checkpoint_dir).to_dict()

        assert new_dict["additional_field"] == "data"

        # Turn into fs
        new_dir = Checkpoint.from_dict(new_dict).to_directory()

        assert os.path.exists(os.path.join(new_dir, "test_data.pkl"))
        assert os.path.exists(
            os.path.join(new_dir, "additional_field.meta.pkl"))
        assert os.path.exists(os.path.join(new_dir, "even_more.txt"))
示例#2
0
def test_air_integrations_reconfigure(serve_instance):
    path = tempfile.mkdtemp()
    uri = f"file://{path}/test_uri"
    Checkpoint.from_dict({"increment": 2}).to_uri(uri)

    predictor_cls = "ray.serve.tests.test_air_integrations.AdderPredictor"
    additional_config = {
        "checkpoint": {"increment": 5},
        "predictor_cls": "ray.serve.tests.test_air_integrations.AdderPredictor",
    }

    with InputNode() as dag_input:
        m1 = PredictorDeployment.options(user_config=additional_config).bind(
            predictor_cls=predictor_cls,
            checkpoint=uri,
        )
        dag = m1.predict.bind(dag_input)
    deployments = build(Ingress.bind(dag))
    for d in deployments:
        d.deploy()

    resp = requests.post("http://127.0.0.1:8000/ingress", json={"array": [40]})
    print(resp.text)
    resp.raise_for_status()
    return resp.json() == {"value": [45], "batch_size": 1}
示例#3
0
def test_preprocessor_in_checkpoint(ray_start_4_cpus, tmpdir):
    train_dataset = ray.data.from_pandas(train_df)
    valid_dataset = ray.data.from_pandas(test_df)

    class DummyPreprocessor(Preprocessor):
        def __init__(self):
            super().__init__()
            self.is_same = True

        def fit(self, dataset):
            self.fitted_ = True

        def _transform_pandas(self, df: "pd.DataFrame") -> "pd.DataFrame":
            return df

    trainer = XGBoostTrainer(
        scaling_config=scale_config,
        label_column="target",
        params=params,
        datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset},
        preprocessor=DummyPreprocessor(),
    )
    result = trainer.fit()

    # Move checkpoint to a different directory.
    checkpoint_dict = result.checkpoint.to_dict()
    checkpoint = Checkpoint.from_dict(checkpoint_dict)
    checkpoint_path = checkpoint.to_directory(tmpdir)
    resume_from = Checkpoint.from_directory(checkpoint_path)

    model, preprocessor = load_checkpoint(resume_from)
    assert get_num_trees(model) == 10
    assert preprocessor.is_same
    assert preprocessor.fitted_
示例#4
0
def test_resume_from_checkpoint(ray_start_4_cpus, tmpdir):
    train_dataset = ray.data.from_pandas(train_df)
    valid_dataset = ray.data.from_pandas(test_df)
    trainer = XGBoostTrainer(
        scaling_config=scale_config,
        label_column="target",
        params=params,
        num_boost_round=5,
        datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset},
    )
    result = trainer.fit()
    checkpoint = result.checkpoint
    xgb_model, _ = load_checkpoint(checkpoint)
    assert get_num_trees(xgb_model) == 5

    # Move checkpoint to a different directory.
    checkpoint_dict = result.checkpoint.to_dict()
    checkpoint = Checkpoint.from_dict(checkpoint_dict)
    checkpoint_path = checkpoint.to_directory(tmpdir)
    resume_from = Checkpoint.from_directory(checkpoint_path)

    trainer = XGBoostTrainer(
        scaling_config=scale_config,
        label_column="target",
        params=params,
        num_boost_round=5,
        datasets={TRAIN_DATASET_KEY: train_dataset, "valid": valid_dataset},
        resume_from_checkpoint=resume_from,
    )
    result = trainer.fit()
    checkpoint = result.checkpoint
    model, _ = load_checkpoint(checkpoint)
    assert get_num_trees(model) == 10
示例#5
0
    def test_metadata(self):
        """Test conversion with metadata involved.

        a. from fs to dict checkpoint;
        b. drop some marker to dict checkpoint;
        c. convert back to fs checkpoint;
        d. convert back to dict checkpoint.

        Assert that the marker should still be there."""
        checkpoint = self._prepare_fs_checkpoint()

        # Convert into dict checkpoint
        data_dict = checkpoint.to_dict()
        self.assertIsInstance(data_dict, dict)

        data_dict["my_marker"] = "marked"

        # Create from dict
        checkpoint = Checkpoint.from_dict(data_dict)
        self.assertTrue(checkpoint._data_dict)

        self._assert_fs_checkpoint(checkpoint)

        # Convert back to dict
        data_dict_2 = Checkpoint.from_directory(
            checkpoint.to_directory()).to_dict()
        assert data_dict_2["my_marker"] == "marked"
示例#6
0
    def _convert_directory_checkpoint_to_sync_if_needed(
            self, checkpoint: Checkpoint) -> Checkpoint:
        """Replace the directory checkpoint with a node ip & path dict checkpoint.

        This dict checkpoint will be used to sync the directory.
        If we were to use a directory checkpoint directly, it would get deepcopied &
        serialized unnecessarily."""
        with checkpoint.as_directory() as checkpoint_path:
            # Load checkpoint from path.
            checkpoint_path = Path(checkpoint_path).expanduser().absolute()
            if not checkpoint_path.joinpath(TUNE_CHECKPOINT_ID).exists():
                # If the ID file is missing, we assume that this is already
                # a sync checkpoint
                dict_checkpoint = checkpoint.to_dict()
                if (NODE_IP_KEY not in dict_checkpoint
                        or CHECKPOINT_PATH_ON_NODE_KEY not in dict_checkpoint):
                    raise ValueError(
                        "Wrong checkpoint format. Ensure the checkpoint is a "
                        "result of `HuggingFaceTrainer`.")
                return checkpoint
            with open(checkpoint_path.joinpath(TUNE_CHECKPOINT_ID), "r") as f:
                tune_checkpoint_id = int(f.read())

            return Checkpoint.from_dict({
                NODE_IP_KEY:
                get_node_ip_address(),
                CHECKPOINT_PATH_ON_NODE_KEY:
                str(checkpoint_path),
                TUNE_CHECKPOINT_ID:
                tune_checkpoint_id,
            })
示例#7
0
    def testDataCheckpointSerde(self):
        # Data checkpoints keep the same internal representation, including
        # their data.

        checkpoint = Checkpoint.from_dict({"checkpoint_data": 5})

        self._testCheckpointSerde(checkpoint, *checkpoint.get_internal_representation())
示例#8
0
def test_simple_adder(serve_instance):
    ModelWrapperDeployment.options(name="Adder").deploy(
        predictor_cls=AdderPredictor,
        checkpoint=Checkpoint.from_dict({"increment": 2}),
    )
    resp = ray.get(send_request.remote(json={"array": [40]}))
    assert resp == {"value": [42], "batch_size": 1}
示例#9
0
    def reconfigure(self, config):
        """Reconfigure model from config checkpoint"""
        from ray.air.checkpoint import Checkpoint

        predictor_cls = _load_predictor_cls(config["predictor_cls"])
        self.model = predictor_cls.from_checkpoint(
            Checkpoint.from_dict(config["checkpoint"]))
示例#10
0
文件: keras.py 项目: ray-project/ray
    def _handle(self, logs: Dict, when: str = None):
        self._counter[when] += 1

        if isinstance(self._frequency, list):
            index = self._on.index(when)
            freq = self._frequency[index]
        else:
            freq = self._frequency

        checkpoint = None
        if freq > 0 and self._counter[when] % freq == 0:
            checkpoint = Checkpoint.from_dict(
                {MODEL_KEY: self.model.get_weights()})

        if not self._metrics:
            report_dict = logs
        else:
            report_dict = {}
            for key in self._metrics:
                if isinstance(self._metrics, dict):
                    metric = self._metrics[key]
                else:
                    metric = key
                report_dict[key] = logs[metric]

        session.report(report_dict, checkpoint=checkpoint)
示例#11
0
def test_run(ray_start_4_cpus):
    """Tests that Train can be run without any specific backends."""
    num_workers = 2
    key = "value"
    value = 1
    config = TestConfig()

    def train_func():
        checkpoint = session.get_checkpoint()
        session.report(metrics=checkpoint.to_dict(), checkpoint=checkpoint)
        return checkpoint.to_dict()[key]

    checkpoint = Checkpoint.from_dict({
        # this would be set during checkpoint saving
        "_current_checkpoint_id": 1,
        key: value,
    })

    trainer = DataParallelTrainer(
        train_func,
        backend_config=config,
        resume_from_checkpoint=checkpoint,
        scaling_config=ScalingConfig(num_workers=num_workers),
    )
    results = trainer.fit()

    assert results.checkpoint.to_dict()[key] == checkpoint.to_dict()[key]
示例#12
0
def test_resume_from_checkpoint(ray_start_4_cpus, tmpdir):
    def train_func():
        checkpoint = session.get_checkpoint()
        if checkpoint:
            epoch = checkpoint.to_dict()["epoch"]
        else:
            epoch = 0
        for i in range(epoch, epoch + 2):
            session.report({"epoch": i}, checkpoint=Checkpoint.from_dict({"epoch": i}))

    trainer = DataParallelTrainer(
        train_loop_per_worker=train_func, scaling_config=scale_config
    )
    result = trainer.fit()
    assert result.checkpoint.to_dict()["epoch"] == 1

    # Move checkpoint to a different directory.
    checkpoint_dict = result.checkpoint.to_dict()
    checkpoint = Checkpoint.from_dict(checkpoint_dict)
    checkpoint_path = checkpoint.to_directory(tmpdir)
    resume_from = Checkpoint.from_directory(checkpoint_path)

    trainer = DataParallelTrainer(
        train_loop_per_worker=train_func,
        scaling_config=scale_config,
        resume_from_checkpoint=resume_from,
    )
    result = trainer.fit()
    assert result.checkpoint.to_dict()["epoch"] == 2
示例#13
0
def test_separate_gpu_stage(shutdown_only):
    ray.init(num_gpus=1)
    batch_predictor = BatchPredictor.from_checkpoint(
        Checkpoint.from_dict({
            "factor": 2.0,
            PREPROCESSOR_KEY: DummyPreprocessor()
        }),
        DummyPredictor,
    )
    ds = batch_predictor.predict(
        ray.data.range_table(10),
        num_gpus_per_worker=1,
        separate_gpu_stage=True,
        allow_gpu=True,
    )
    stats = ds.stats()
    assert "Stage 1 read->map_batches:" in stats, stats
    assert "Stage 2 map_batches:" in stats, stats
    assert ds.max("value") == 36.0, ds

    ds = batch_predictor.predict(
        ray.data.range_table(10),
        num_gpus_per_worker=1,
        separate_gpu_stage=False,
        allow_gpu=True,
    )
    stats = ds.stats()
    assert "Stage 1 read:" in stats, stats
    assert "Stage 2 map_batches:" in stats, stats
    assert ds.max("value") == 36.0, ds
示例#14
0
def test_get_and_set_preprocessor():
    """Test preprocessor can be set and get."""

    preprocessor = DummyPreprocessor(1)
    batch_predictor = BatchPredictor.from_checkpoint(
        Checkpoint.from_dict({
            "factor": 2.0,
            PREPROCESSOR_KEY: preprocessor
        }),
        DummyPredictor,
    )
    assert batch_predictor.get_preprocessor() == preprocessor

    test_dataset = ray.data.range(4)
    output_ds = batch_predictor.predict(test_dataset)
    assert output_ds.to_pandas().to_numpy().squeeze().tolist() == [
        0.0,
        2.0,
        4.0,
        6.0,
    ]

    preprocessor2 = DummyPreprocessor(2)
    batch_predictor.set_preprocessor(preprocessor2)
    assert batch_predictor.get_preprocessor() == preprocessor2

    output_ds = batch_predictor.predict(test_dataset)
    assert output_ds.to_pandas().to_numpy().squeeze().tolist() == [
        0.0,
        4.0,
        8.0,
        12.0,
    ]
示例#15
0
def test_batch_prediction():
    batch_predictor = BatchPredictor.from_checkpoint(
        Checkpoint.from_dict({
            "factor": 2.0,
            PREPROCESSOR_KEY: DummyPreprocessor()
        }),
        DummyPredictor,
    )

    test_dataset = ray.data.range(4)
    ds = batch_predictor.predict(test_dataset)
    # Check fusion occurred.
    assert "read->map_batches" in ds.stats(), ds.stats()
    assert ds.to_pandas().to_numpy().squeeze().tolist() == [
        0.0,
        4.0,
        8.0,
        12.0,
    ]

    test_dataset = ray.data.from_items([1.0, 2.0, 3.0, 4.0])
    assert next(
        batch_predictor.predict_pipelined(test_dataset, blocks_per_window=2).
        iter_datasets()).to_pandas().to_numpy().squeeze().tolist() == [
            4.0,
            8.0,
        ]
示例#16
0
def test_get_and_set_preprocessor():
    """Test preprocessor can be set and get."""

    preprocessor = DummyPreprocessor(1)
    predictor = DummyPredictor.from_checkpoint(
        Checkpoint.from_dict({
            "factor": 2.0,
            PREPROCESSOR_KEY: preprocessor
        }), )
    assert predictor.get_preprocessor() == preprocessor

    test_dataset = pd.DataFrame(range(4))
    output_df = predictor.predict(test_dataset)
    assert output_df.to_numpy().squeeze().tolist() == [
        0.0,
        2.0,
        4.0,
        6.0,
    ]

    preprocessor2 = DummyPreprocessor(2)
    predictor.set_preprocessor(preprocessor2)
    assert predictor.get_preprocessor() == preprocessor2

    output_df = predictor.predict(test_dataset)
    assert output_df.to_numpy().squeeze().tolist() == [
        0.0,
        4.0,
        8.0,
        12.0,
    ]
示例#17
0
 def train_func():
     checkpoint = session.get_checkpoint()
     if checkpoint:
         epoch = checkpoint.to_dict()["epoch"]
     else:
         epoch = 0
     for i in range(epoch, epoch + 2):
         session.report({"epoch": i}, checkpoint=Checkpoint.from_dict({"epoch": i}))
示例#18
0
 def testDictCheckpointWithPreprocessorAsDir(self):
     preprocessor = DummyPreprocessor(1)
     data = {"metric": 5, PREPROCESSOR_KEY: preprocessor}
     checkpoint = Checkpoint.from_dict(data)
     checkpoint_path = checkpoint.to_directory()
     checkpoint = Checkpoint.from_directory(checkpoint_path)
     preprocessor = checkpoint.get_preprocessor()
     assert preprocessor.multiplier == 1
示例#19
0
 def testLocalCheckpointSerde(self):
     # Local checkpoints are converted to bytes on serialization. Currently
     # this is a pickled dict, so we compare with a dict checkpoint.
     source_checkpoint = Checkpoint.from_dict({"checkpoint_data": 5})
     with source_checkpoint.as_directory() as tmpdir:
         checkpoint = Checkpoint.from_directory(tmpdir)
         self._testCheckpointSerde(
             checkpoint, *source_checkpoint.get_internal_representation())
示例#20
0
def test_predictor_from_checkpoint_kwargs(serve_instance):
    PredictorDeployment.options(name="Adder").deploy(
        predictor_cls=AdderPredictor,
        checkpoint=Checkpoint.from_dict({"increment": 2}),
        do_double=True,
    )
    resp = ray.get(send_request.remote(json={"array": [40]}))
    assert resp == {"value": [84], "batch_size": 1}
示例#21
0
 def _prepare_dict_checkpoint(self) -> Checkpoint:
     # Create checkpoint from dict
     checkpoint = Checkpoint.from_dict(self.checkpoint_dict_data)
     self.assertIsInstance(checkpoint, Checkpoint)
     self.assertTrue(checkpoint._data_dict)
     self.assertEqual(checkpoint._data_dict["metric"],
                      self.checkpoint_dict_data["metric"])
     return checkpoint
示例#22
0
    def testObjRefCheckpointSerde(self):
        # Obj ref checkpoints are dict checkpoints put into the Ray object
        # store, but they have their own data representation (the obj ref).
        # We thus compare with the actual obj ref checkpoint.
        source_checkpoint = Checkpoint.from_dict({"checkpoint_data": 5})
        obj_ref = source_checkpoint.to_object_ref()
        checkpoint = Checkpoint.from_object_ref(obj_ref)

        self._testCheckpointSerde(checkpoint, *checkpoint.get_internal_representation())
示例#23
0
def test_init(model, preprocessor):
    predictor = TorchPredictor(model=model, preprocessor=preprocessor)

    checkpoint = {MODEL_KEY: model, PREPROCESSOR_KEY: preprocessor}
    checkpoint_predictor = TorchPredictor.from_checkpoint(
        Checkpoint.from_dict(checkpoint))

    assert checkpoint_predictor.model == predictor.model
    assert checkpoint_predictor.preprocessor == predictor.preprocessor
示例#24
0
    def testBytesCheckpointSerde(self):
        # Bytes checkpoints are just dict checkpoints constructed
        # from pickled data, so we compare with the source dict checkpoint.
        source_checkpoint = Checkpoint.from_dict({"checkpoint_data": 5})
        blob = source_checkpoint.to_bytes()
        checkpoint = Checkpoint.from_bytes(blob)

        self._testCheckpointSerde(
            checkpoint, *source_checkpoint.get_internal_representation())
示例#25
0
def test_mixed_input_output_type_with_batching(serve_instance):
    ModelWrapperDeployment.options(name="Adder").deploy(
        predictor_cls=TakeArrayReturnDataFramePredictor,
        checkpoint=Checkpoint.from_dict({"increment": 2}),
        batching_params=dict(max_batch_size=2, batch_wait_timeout_s=1000),
    )

    refs = [send_request.remote(json={"array": [40, 45]}) for _ in range(2)]
    for resp in ray.get(refs):
        assert resp == [{"col_a": 42.0, "col_b": 47.0}]
示例#26
0
def test_batching(serve_instance):
    ModelWrapperDeployment.options(name="Adder").deploy(
        predictor_cls=AdderPredictor,
        checkpoint=Checkpoint.from_dict({"increment": 2}),
        batching_params=dict(max_batch_size=2, batch_wait_timeout_s=1000),
    )

    refs = [send_request.remote(json={"array": [40]}) for _ in range(2)]
    for resp in ray.get(refs):
        assert resp == {"value": [42], "batch_size": 2}
示例#27
0
def test_kwargs(predict_pandas_mock):
    checkpoint = Checkpoint.from_dict({"factor": 2.0})
    predictor = DummyPredictor.from_checkpoint(checkpoint)

    input = pd.DataFrame({"x": [1, 2, 3]})
    predictor.predict(input, extra_arg=1)

    # Second element in call_args is the kwargs.
    assert "extra_arg" in predict_pandas_mock.call_args[1]
    assert predict_pandas_mock.call_args[1]["extra_arg"] == 1
示例#28
0
def test_predict_array():
    checkpoint = {MODEL_KEY: weights}
    predictor = TensorflowPredictor.from_checkpoint(
        Checkpoint.from_dict(checkpoint), build_model)

    data_batch = np.array([[1], [2], [3]])
    predictions = predictor.predict(data_batch)

    assert len(predictions) == 3
    assert predictions.to_numpy().flatten().tolist() == [1, 2, 3]
示例#29
0
def test_predict(convert_from_pandas_mock, convert_to_pandas_mock):
    checkpoint = Checkpoint.from_dict({"factor": 2.0})
    predictor = DummyPredictor.from_checkpoint(checkpoint)

    input = pd.DataFrame({"x": [1, 2, 3]})
    expected_output = input * 4.0
    actual_output = predictor.predict(input)
    assert actual_output.equals(expected_output)

    # Ensure the proper conversion functions are called.
    convert_to_pandas_mock.assert_called_once()
    convert_from_pandas_mock.assert_called_once()
示例#30
0
def test_model_wrappers_in_pipeline(serve_instance):
    path = tempfile.mkdtemp()
    uri = f"file://{path}/test_uri"
    Checkpoint.from_dict({"increment": 2}).to_uri(uri)

    predictor_cls = "ray.serve.tests.test_model_wrappers.AdderPredictor"

    with InputNode() as dag_input:
        m1 = ModelWrapperDeployment.bind(
            predictor_cls=predictor_cls,
            checkpoint=uri,
        )
        dag = m1.predict.bind(dag_input)
    deployments = build(Ingress.bind(dag))
    for d in deployments:
        d.deploy()

    resp = requests.post("http://127.0.0.1:8000/ingress", json={"array": [40]})
    print(resp.text)
    resp.raise_for_status()
    return resp.json() == {"value": [42], "batch_size": 1}