示例#1
0
def test_batch_predict_classes_probability(spark_context, classification_model,
                                           mnist_data):
    batch_size = 64
    nb_classes = 10
    epochs = 1

    x_train, y_train, x_test, y_test = mnist_data
    x_train = x_train[:1000]
    y_train = y_train[:1000]
    df = to_data_frame(spark_context, x_train, y_train, categorical=True)
    test_df = to_data_frame(spark_context, x_test, y_test, categorical=True)

    sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    sgd_conf = optimizers.serialize(sgd)

    # Initialize Spark ML Estimator
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(classification_model.to_yaml())
    estimator.set_optimizer_config(sgd_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("categorical_crossentropy")
    estimator.set_metrics(['acc'])
    estimator.set_epochs(epochs)
    estimator.set_batch_size(batch_size)
    estimator.set_validation_split(0.1)
    estimator.set_categorical_labels(True)
    estimator.set_nb_classes(nb_classes)

    # Fitting a model returns a Transformer
    fitted_pipeline = estimator.fit(df)

    results = fitted_pipeline.transform(test_df)

    # Set inference batch size and do transform again on the same test_df
    inference_batch_size = int(len(y_test) / 10)
    fitted_pipeline.set_params(inference_batch_size=inference_batch_size)
    fitted_pipeline.set_params(outputCol="prediction_via_batch_inference")
    results_with_batch_prediction = fitted_pipeline.transform(results)
    # we should have an array of 10 elements in the prediction column, since we have 10 classes
    # and therefore 10 probabilities
    results_np = results_with_batch_prediction.take(1)[0]
    assert len(results_np.prediction) == 10
    assert len(results_np.prediction_via_batch_inference) == 10
    assert np.array_equal(results_np.prediction,
                          results_np.prediction_via_batch_inference)
示例#2
0
文件: ml_mlp.py 项目: nkhuyu/elephas
df = to_data_frame(sc, X_train, Y_train, categorical=True)
test_df = to_data_frame(sc, X_test, Y_test, categorical=True)

# Initialize Spark ML Estimator
adadelta = elephas_optimizers.Adadelta()
estimator = ElephasEstimator(sc,
                             model,
                             nb_epoch=nb_epoch,
                             batch_size=batch_size,
                             optimizer=adadelta,
                             frequency='batch',
                             mode='asynchronous',
                             num_workers=2,
                             verbose=0,
                             validation_split=0.1,
                             categorical=True,
                             nb_classes=nb_classes)

# Fitting a model returns a Transformer
fitted_model = estimator.fit(df)

# Evaluate Spark model by evaluating the underlying model
prediction = fitted_model.transform(test_df)
pnl = prediction.select("label", "prediction")
pnl.show(100)

prediction_and_label = pnl.map(lambda row: (row.label, row.prediction))
metrics = MulticlassMetrics(prediction_and_label)
print(metrics.precision())
print(metrics.recall())
示例#3
0
df = to_data_frame(sc, x_train, y_train, categorical=True)
test_df = to_data_frame(sc, x_test, y_test, categorical=True)

# Initialize Spark ML Estimator
adadelta = elephas_optimizers.Adadelta()
estimator = ElephasEstimator(sc,
                             model,
                             nb_epoch=nb_epoch,
                             batch_size=batch_size,
                             optimizer=adadelta,
                             frequency='batch',
                             mode='asynchronous',
                             num_workers=2,
                             verbose=0,
                             validation_split=0.1,
                             categorical=True,
                             nb_classes=nb_classes)

# Fitting a model returns a Transformer
fitted_model = estimator.fit(df)

# Evaluate Spark model by evaluating the underlying model
prediction = fitted_model.transform(test_df)
pnl = prediction.select("label", "prediction")
pnl.show(100)

prediction_and_label = pnl.map(lambda row: (row.label, row.prediction))
metrics = MulticlassMetrics(prediction_and_label)
print(metrics.precision())
print(metrics.recall())
示例#4
0
estimator.setFeaturesCol("features")  # These two come directly from pyspark,
estimator.setLabelCol("label")  # hence the camel case. Sorry :)
estimator.set_keras_model_config(
    model.to_yaml())  # Provide serialized Keras model
estimator.set_optimizer_config(
    adadelta.get_config())  # Provide serialized Elephas optimizer
estimator.set_categorical_labels(True)
estimator.set_nb_classes(2)
estimator.set_num_workers(
    1)  # We just use one worker here. Feel free to adapt it.
estimator.set_nb_epoch(20)
estimator.set_batch_size(128)
estimator.set_verbosity(1)
estimator.set_validation_split(0.15)

fitted_model = estimator.fit(train_df)
prediction = fitted_model.transform(test_df)
pnl = prediction.select("label", "prediction")
pnl.show(100)

#from pyspark.ml import Pipeline

#pipeline = Pipeline(stages=[estimator])

#fitted_pipeline = pipeline.fit(train_df) # Fit model to data

#prediction = fitted_pipeline.transform(test_df)

#pnl = prediction.select("index_category", "prediction")
#pnl.show(100)