Пример #1
0
def test_batch_prediction_from_pandas_udf():
    def check_truth(df, all_true=False):
        if all_true:
            return pd.DataFrame({"bool": [True] * len(df)})
        return pd.DataFrame({"bool": df["a"] == df["b"]})

    batch_predictor = BatchPredictor.from_pandas_udf(check_truth)

    test_dataset = ray.data.from_pandas(
        pd.DataFrame({
            "a": [1, 2, 3],
            "b": [1, 5, 6]
        }))

    output_ds = batch_predictor.predict(test_dataset)
    output = [row["bool"] for row in output_ds.take()]
    assert output == [True, False, False]

    output_ds = batch_predictor.predict(test_dataset, all_true=True)
    output = [row["bool"] for row in output_ds.take()]
    assert output == [True, True, True]
Пример #2
0
# {'predictions': array([-1.2789773], dtype=float32), 'label': 0}
# {'predictions': array([-2.5579545], dtype=float32), 'label': 1}
# {'predictions': array([-3.8369317], dtype=float32), 'label': 0}

correct = predictions.map_batches(calculate_accuracy)
print("Final accuracy: ", correct.mean(on="correct"))
# Final accuracy:  0.5
# __compute_accuracy_end__

# __pipelined_prediction_start__
import pandas as pd
import ray
from ray.air import Checkpoint
from ray.train.predictor import Predictor
from ray.train.batch_predictor import BatchPredictor

# Create a BatchPredictor that always returns `42` for each input.
batch_pred = BatchPredictor.from_pandas_udf(
    lambda data: pd.DataFrame({"a": [42] * len(data)}))

# Create a dummy dataset.
ds = ray.data.range_tensor(200, parallelism=4)
# Setup a prediction pipeline.
pipeline = batch_pred.predict_pipelined(ds, blocks_per_window=1)
for batch in pipeline.iter_batches():
    print("Pipeline result", batch)
    # 0    42
    # 1    42
    # ...
# __pipelined_prediction_end__