示例#1
0
def test_set_predict_classes_regression_warning(spark_context,
                                                regression_model):
    with pytest.warns(ElephasWarning):
        estimator = ElephasEstimator()
        estimator.set_loss("mae")
        estimator.set_metrics(['mae'])
        estimator.set_categorical_labels(False)
        estimator.set_predict_classes(True)
示例#2
0
def test_predict_classes_probability(spark_context, classification_model,
                                     mnist_data):
    batch_size = 64
    nb_classes = 10
    epochs = 1

    x_train, y_train, x_test, y_test = mnist_data
    x_train = x_train[:1000]
    y_train = y_train[:1000]
    df = to_data_frame(spark_context, x_train, y_train, categorical=True)
    test_df = to_data_frame(spark_context, x_test, y_test, categorical=True)

    sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    sgd_conf = optimizers.serialize(sgd)

    # Initialize Spark ML Estimator
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(classification_model.to_yaml())
    estimator.set_optimizer_config(sgd_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("categorical_crossentropy")
    estimator.set_metrics(['acc'])
    estimator.set_predict_classes(False)
    estimator.set_epochs(epochs)
    estimator.set_batch_size(batch_size)
    estimator.set_validation_split(0.1)
    estimator.set_categorical_labels(True)
    estimator.set_nb_classes(nb_classes)

    # Fitting a model returns a Transformer
    pipeline = Pipeline(stages=[estimator])
    fitted_pipeline = pipeline.fit(df)

    results = fitted_pipeline.transform(test_df)
    # we should have an array of 10 elements in the prediction column, since we have 10 classes
    # and therefore 10 probabilities
    assert len(results.take(1)[0].prediction) == 10