示例#1
0
def test_set_predict_classes_regression_warning(spark_context,
                                                regression_model):
    with pytest.warns(ElephasWarning):
        estimator = ElephasEstimator()
        estimator.set_loss("mae")
        estimator.set_metrics(['mae'])
        estimator.set_categorical_labels(False)
        estimator.set_predict_classes(True)
def test_serialization_estimator():
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(model.to_yaml())
    estimator.set_loss("categorical_crossentropy")

    estimator.save("test.h5")
    load_ml_estimator("test.h5")
示例#3
0
def test_serialization_estimator(classification_model):
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(classification_model.to_yaml())
    estimator.set_loss("categorical_crossentropy")

    estimator.save("test.h5")
    loaded_model = load_ml_estimator("test.h5")
    assert loaded_model.get_model().to_yaml() == classification_model.to_yaml()
示例#4
0
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

sgd = optimizers.SGD(lr=0.01)
sgd_conf = optimizers.serialize(sgd)

# Initialize Elephas Spark ML Estimator
estimator = ElephasEstimator()
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(sgd_conf)
estimator.set_mode("synchronous")
estimator.set_loss("categorical_crossentropy")
estimator.set_metrics(['acc'])
estimator.setFeaturesCol("scaled_features")
estimator.setLabelCol("index_category")
estimator.set_epochs(10)
estimator.set_batch_size(128)
estimator.set_num_workers(1)
estimator.set_verbosity(0)
estimator.set_validation_split(0.15)
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)
示例#5
0
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')


# Initialize Elephas Spark ML Estimator
adagrad = elephas_optimizers.Adagrad()

estimator = ElephasEstimator()
estimator.setFeaturesCol("scaled_features")
estimator.setLabelCol("index_category")
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(adagrad.get_config())
estimator.set_nb_epoch(10)
estimator.set_batch_size(128)
estimator.set_num_workers(4)
estimator.set_verbosity(0)
estimator.set_validation_split(0.15)
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)

# Fitting a model returns a Transformer
pipeline = Pipeline(stages=[string_indexer, scaler, estimator])
fitted_pipeline = pipeline.fit(train_df)
示例#6
0
model.add(Dropout(0.5))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

# Initialize Elephas Spark ML Estimator
adagrad = elephas_optimizers.Adagrad()

estimator = ElephasEstimator()
estimator.setFeaturesCol("scaled_features")
estimator.setLabelCol("index_category")
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(adagrad.get_config())
estimator.set_nb_epoch(10)
estimator.set_batch_size(128)
estimator.set_num_workers(4)
estimator.set_verbosity(0)
estimator.set_validation_split(0.15)
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)

# Fitting a model returns a Transformer
pipeline = Pipeline(stages=[string_indexer, scaler, estimator])
fitted_pipeline = pipeline.fit(train_df)
        elapsed_validation_VectorsCpu = time.clock() - startCpu

        start = time.time()
        startCpu = time.clock()

        input_dim = train_df.select("features").first()[0].shape
        logger.info(
            f"We have {num_classes} classes and {input_dim[0]} features")

        model = get_model(train_df, input_dim)
        model.compile(optimizer=optimizer, loss=mywloss, metrics=['accuracy'])
        adam = optimizers.nadam(lr=0.01)
        opt_conf = optimizers.serialize(adam)

        # Initialize SparkML Estimator and set all relevant properties
        estimator = ElephasEstimator()
        estimator.setFeaturesCol(
            "features")  # These two come directly from pyspark,
        estimator.setLabelCol("target")  # hence the camel case. Sorry :)
        estimator.set_keras_model_config(
            model.to_yaml())  # Provide serialized Keras model
        estimator.set_categorical_labels(True)
        estimator.set_nb_classes(num_classes)
        estimator.set_num_workers(
            10)  # We just use one worker here. Feel free to adapt it.
        estimator.set_epochs(2)  # was max-epochs
        estimator.set_batch_size(batch_size)  # was 128
        estimator.set_verbosity(2)  # was 1
        estimator.set_validation_split(0.15)
        estimator.set_optimizer_config(opt_conf)
        estimator.set_mode("synchronous")  # Was synchronous
示例#8
0
def test_spark_ml_model_classification(spark_context, classification_model,
                                       mnist_data):
    batch_size = 64
    nb_classes = 10
    epochs = 1

    x_train, y_train, x_test, y_test = mnist_data
    x_train = x_train[:1000]
    y_train = y_train[:1000]
    df = to_data_frame(spark_context, x_train, y_train, categorical=True)
    test_df = to_data_frame(spark_context, x_test, y_test, categorical=True)

    sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    sgd_conf = optimizers.serialize(sgd)

    # Initialize Spark ML Estimator
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(classification_model.to_yaml())
    estimator.set_optimizer_config(sgd_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("categorical_crossentropy")
    estimator.set_metrics(['acc'])
    estimator.set_epochs(epochs)
    estimator.set_batch_size(batch_size)
    estimator.set_validation_split(0.1)
    estimator.set_categorical_labels(True)
    estimator.set_nb_classes(nb_classes)

    # Fitting a model returns a Transformer
    pipeline = Pipeline(stages=[estimator])
    fitted_pipeline = pipeline.fit(df)

    # Evaluate Spark model by evaluating the underlying model
    prediction = fitted_pipeline.transform(test_df)
    pnl = prediction.select("label", "prediction")
    pnl.show(100)

    prediction_and_label = pnl.rdd.map(lambda row: (row.label, row.prediction))
    metrics = MulticlassMetrics(prediction_and_label)
    print(metrics.accuracy)
示例#9
0
def test_regression_model(spark_context, regression_model,
                          boston_housing_dataset):
    batch_size = 64
    epochs = 10

    x_train, y_train, x_test, y_test = boston_housing_dataset
    df = to_data_frame(spark_context, x_train, y_train)
    test_df = to_data_frame(spark_context, x_test, y_test)

    sgd = optimizers.SGD(lr=0.00001)
    sgd_conf = optimizers.serialize(sgd)
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(regression_model.to_yaml())
    estimator.set_optimizer_config(sgd_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("mae")
    estimator.set_metrics(['mae'])
    estimator.set_epochs(epochs)
    estimator.set_batch_size(batch_size)
    estimator.set_validation_split(0.01)
    estimator.set_categorical_labels(False)

    pipeline = Pipeline(stages=[estimator])
    fitted_pipeline = pipeline.fit(df)
    prediction = fitted_pipeline.transform(test_df)
    pnl = prediction.select("label", "prediction")
    pnl.show(100)

    prediction_and_observations = pnl.rdd.map(lambda row:
                                              (row.label, row.prediction))
    metrics = RegressionMetrics(prediction_and_observations)
    print(metrics.r2)
示例#10
0
# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
df = to_data_frame(sc, x_train, y_train, categorical=True)
test_df = to_data_frame(sc, x_test, y_test, categorical=True)

# Initialize Spark ML Estimator
adadelta = elephas_optimizers.Adadelta()
estimator = ElephasEstimator(sc,
                             model,
                             nb_epoch=nb_epoch,
                             batch_size=batch_size,
                             optimizer=adadelta,
                             frequency='batch',
                             mode='asynchronous',
                             num_workers=2,
                             verbose=0,
                             validation_split=0.1,
                             categorical=True,
                             nb_classes=nb_classes)

# Fitting a model returns a Transformer
fitted_model = estimator.fit(df)

# Evaluate Spark model by evaluating the underlying model
prediction = fitted_model.transform(test_df)
pnl = prediction.select("label", "prediction")
pnl.show(100)

prediction_and_label = pnl.map(lambda row: (row.label, row.prediction))
示例#11
0
adam = Adam()
model.compile(loss='categorical_crossentropy', optimizer=adam)

# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
df = to_data_frame(sc, x_train, y_train, categorical=True)
test_df = to_data_frame(sc, x_test, y_test, categorical=True)

# Define elephas optimizer
adadelta = elephas_optimizers.Adadelta()

# Initialize Spark ML Estimator
estimator = ElephasEstimator()
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(adadelta.get_config())
estimator.set_nb_epoch(nb_epoch)
estimator.set_batch_size(batch_size)
estimator.set_num_workers(1)
estimator.set_verbosity(0)
estimator.set_validation_split(0.1)
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)

# Fitting a model returns a Transformer
pipeline = Pipeline(stages=[estimator])
fitted_pipeline = pipeline.fit(df)

# Evaluate Spark model by evaluating the underlying model
示例#12
0
def make_model(data):
    data.show()
    data = data.dropna()
    nb_classes = data.select("label").distinct().count()
    input_dim = len(data.select("features").first()[0])

    print(nb_classes, input_dim)

    model = Sequential()
    model.add(Embedding(input_dim=input_dim, output_dim=100))
    #model.add(LSTM(64,return_sequences=False,dropout=0.1,recurrent_dropout=0.1))
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(nb_classes, activation='softmax'))
    #sgd = optimizers.SGD(lr=0.1)
    #model.compile(sgd, 'categorical_crossentropy', ['acc'])
    model.compile(loss='binary_crossentropy', optimizer='adam')

    #model.compile(loss='categorical_crossentropy', optimizer='adam')
    spark_model = SparkModel(model, frequency='epoch', mode='asynchronous')

    adam = optimizers.Adam(lr=0.01)
    opt_conf = optimizers.serialize(adam)

    estimator = ElephasEstimator()
    estimator.setFeaturesCol("features")
    estimator.setLabelCol("label")
    estimator.set_keras_model_config(model.to_yaml())
    estimator.set_categorical_labels(True)
    estimator.set_nb_classes(nb_classes)
    estimator.set_num_workers(1)
    estimator.set_epochs(20)
    estimator.set_batch_size(128)
    estimator.set_verbosity(1)
    estimator.set_validation_split(0.15)
    estimator.set_optimizer_config(opt_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("categorical_crossentropy")
    estimator.set_metrics(['acc'])

    #estimator = ElephasEstimator(model, epochs=20, batch_size=32, frequency='batch', mode='asynchronous', nb_classes=1)

    pipeline = Pipeline(stages=[estimator])
    #fitted_model = estimator.fit(data)
    #prediction = fitted_model.transform(data)

    fitted_pipeline = pipeline.fit(data)  # Fit model to data
    prediction = fitted_pipeline.transform(data)  # Evaluate on train data.
    # prediction = fitted_pipeline.transform(test_df) # <-- The same code evaluates test data.
    pnl = prediction.select("text", "prediction")
    pnl.show(100)

    prediction_and_label = pnl.map(lambda row: (row.text, row.prediction))
    metrics = MulticlassMetrics(prediction_and_label)
    print(metrics.precision())
    pnl = prediction.select("label", "prediction").show()
    pnl.show(100)
示例#13
0
def test_save_pipeline(spark_context, classification_model):
    sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    sgd_conf = optimizers.serialize(sgd)

    # Initialize Spark ML Estimator
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(classification_model.to_yaml())
    estimator.set_optimizer_config(sgd_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("categorical_crossentropy")
    estimator.set_metrics(['acc'])
    estimator.set_epochs(10)
    estimator.set_batch_size(10)
    estimator.set_validation_split(0.1)
    estimator.set_categorical_labels(True)
    estimator.set_nb_classes(10)

    # Fitting a model returns a Transformer
    pipeline = Pipeline(stages=[estimator])
    pipeline.save('tmp')
示例#14
0
def test_batch_predict_classes_probability(spark_context, classification_model,
                                           mnist_data):
    batch_size = 64
    nb_classes = 10
    epochs = 1

    x_train, y_train, x_test, y_test = mnist_data
    x_train = x_train[:1000]
    y_train = y_train[:1000]
    df = to_data_frame(spark_context, x_train, y_train, categorical=True)
    test_df = to_data_frame(spark_context, x_test, y_test, categorical=True)

    sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    sgd_conf = optimizers.serialize(sgd)

    # Initialize Spark ML Estimator
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(classification_model.to_yaml())
    estimator.set_optimizer_config(sgd_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("categorical_crossentropy")
    estimator.set_metrics(['acc'])
    estimator.set_epochs(epochs)
    estimator.set_batch_size(batch_size)
    estimator.set_validation_split(0.1)
    estimator.set_categorical_labels(True)
    estimator.set_nb_classes(nb_classes)

    # Fitting a model returns a Transformer
    fitted_pipeline = estimator.fit(df)

    results = fitted_pipeline.transform(test_df)

    # Set inference batch size and do transform again on the same test_df
    inference_batch_size = int(len(y_test) / 10)
    fitted_pipeline.set_params(inference_batch_size=inference_batch_size)
    fitted_pipeline.set_params(outputCol="prediction_via_batch_inference")
    results_with_batch_prediction = fitted_pipeline.transform(results)
    # we should have an array of 10 elements in the prediction column, since we have 10 classes
    # and therefore 10 probabilities
    results_np = results_with_batch_prediction.take(1)[0]
    assert len(results_np.prediction) == 10
    assert len(results_np.prediction_via_batch_inference) == 10
    assert np.array_equal(results_np.prediction,
                          results_np.prediction_via_batch_inference)
adam = Adam()
model.compile(loss='categorical_crossentropy', optimizer=adam)

# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
df = to_data_frame(sc, x_train, y_train, categorical=True)
test_df = to_data_frame(sc, x_test, y_test, categorical=True)

# Define elephas optimizer
adadelta = elephas_optimizers.Adadelta()

# Initialize Spark ML Estimator
estimator = ElephasEstimator()
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(adadelta.get_config())
estimator.set_nb_epoch(nb_epoch)
estimator.set_batch_size(batch_size)
estimator.set_num_workers(1)
estimator.set_verbosity(0)
estimator.set_validation_split(0.1)
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)

# Fitting a model returns a Transformer
pipeline = Pipeline(stages=[estimator])
fitted_pipeline = pipeline.fit(df)

# Evaluate Spark model by evaluating the underlying model
model.add(Dense(10))
model.add(Activation('softmax'))

# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
df = to_data_frame(sc, x_train, y_train, categorical=True)
test_df = to_data_frame(sc, x_test, y_test, categorical=True)

sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
sgd_conf = optimizers.serialize(sgd)

# Initialize Spark ML Estimator
estimator = ElephasEstimator()
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(sgd_conf)
estimator.set_mode("synchronous")
estimator.set_loss("categorical_crossentropy")
estimator.set_metrics(['acc'])
estimator.set_epochs(epochs)
estimator.set_batch_size(batch_size)
estimator.set_validation_split(0.1)
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)

# Fitting a model returns a Transformer
pipeline = Pipeline(stages=[estimator])
fitted_pipeline = pipeline.fit(df)
示例#17
0
文件: ml_mlp.py 项目: nkhuyu/elephas
# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
df = to_data_frame(sc, X_train, Y_train, categorical=True)
test_df = to_data_frame(sc, X_test, Y_test, categorical=True)

# Initialize Spark ML Estimator
adadelta = elephas_optimizers.Adadelta()
estimator = ElephasEstimator(sc,
                             model,
                             nb_epoch=nb_epoch,
                             batch_size=batch_size,
                             optimizer=adadelta,
                             frequency='batch',
                             mode='asynchronous',
                             num_workers=2,
                             verbose=0,
                             validation_split=0.1,
                             categorical=True,
                             nb_classes=nb_classes)

# Fitting a model returns a Transformer
fitted_model = estimator.fit(df)

# Evaluate Spark model by evaluating the underlying model
prediction = fitted_model.transform(test_df)
pnl = prediction.select("label", "prediction")
pnl.show(100)

prediction_and_label = pnl.map(lambda row: (row.label, row.prediction))
示例#18
0
def test_set_cols(spark_context, regression_model, boston_housing_dataset):
    batch_size = 64
    epochs = 10

    x_train, y_train, x_test, y_test = boston_housing_dataset
    df = to_data_frame(spark_context, x_train, y_train)
    df = df.withColumnRenamed('features', 'scaled_features')
    df = df.withColumnRenamed('label', 'ground_truth')
    test_df = to_data_frame(spark_context, x_test, y_test)
    test_df = test_df.withColumnRenamed('features', 'scaled_features')
    test_df = test_df.withColumnRenamed('label', 'ground_truth')

    sgd = optimizers.SGD(lr=0.00001)
    sgd_conf = optimizers.serialize(sgd)
    estimator = ElephasEstimator(labelCol='ground_truth',
                                 outputCol='output',
                                 featuresCol='scaled_features')
    estimator.set_keras_model_config(regression_model.to_yaml())
    estimator.set_optimizer_config(sgd_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("mae")
    estimator.set_metrics(['mae'])
    estimator.set_epochs(epochs)
    estimator.set_batch_size(batch_size)
    estimator.set_validation_split(0.01)
    estimator.set_categorical_labels(False)

    pipeline = Pipeline(stages=[estimator])
    fitted_pipeline = pipeline.fit(df)
    prediction = fitted_pipeline.transform(test_df)
    pnl = prediction.select("ground_truth", "output")
    pnl.show(100)

    prediction_and_observations = pnl.rdd.map(
        lambda row: (row['ground_truth'], row['output']))
    metrics = RegressionMetrics(prediction_and_observations)
    print(metrics.r2)
示例#19
0
def test_predict_classes_probability(spark_context, classification_model,
                                     mnist_data):
    batch_size = 64
    nb_classes = 10
    epochs = 1

    x_train, y_train, x_test, y_test = mnist_data
    x_train = x_train[:1000]
    y_train = y_train[:1000]
    df = to_data_frame(spark_context, x_train, y_train, categorical=True)
    test_df = to_data_frame(spark_context, x_test, y_test, categorical=True)

    sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    sgd_conf = optimizers.serialize(sgd)

    # Initialize Spark ML Estimator
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(classification_model.to_yaml())
    estimator.set_optimizer_config(sgd_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("categorical_crossentropy")
    estimator.set_metrics(['acc'])
    estimator.set_predict_classes(False)
    estimator.set_epochs(epochs)
    estimator.set_batch_size(batch_size)
    estimator.set_validation_split(0.1)
    estimator.set_categorical_labels(True)
    estimator.set_nb_classes(nb_classes)

    # Fitting a model returns a Transformer
    pipeline = Pipeline(stages=[estimator])
    fitted_pipeline = pipeline.fit(df)

    results = fitted_pipeline.transform(test_df)
    # we should have an array of 10 elements in the prediction column, since we have 10 classes
    # and therefore 10 probabilities
    assert len(results.take(1)[0].prediction) == 10
示例#20
0
def test_custom_objects(spark_context, boston_housing_dataset):
    def custom_activation(x):
        return 2 * relu(x)

    model = Sequential()
    model.add(Dense(64, input_shape=(13, )))
    model.add(Dense(64, activation=custom_activation))
    model.add(Dense(1, activation='linear'))
    x_train, y_train, x_test, y_test = boston_housing_dataset
    df = to_data_frame(spark_context, x_train, y_train)
    test_df = to_data_frame(spark_context, x_test, y_test)

    sgd = optimizers.SGD(lr=0.00001)
    sgd_conf = optimizers.serialize(sgd)
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(model.to_yaml())
    estimator.set_optimizer_config(sgd_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("mae")
    estimator.set_metrics(['mae'])
    estimator.set_epochs(10)
    estimator.set_batch_size(32)
    estimator.set_validation_split(0.01)
    estimator.set_categorical_labels(False)
    estimator.set_custom_objects({'custom_activation': custom_activation})

    pipeline = Pipeline(stages=[estimator])
    fitted_pipeline = pipeline.fit(df)
    prediction = fitted_pipeline.transform(test_df)
示例#21
0
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')




from elephas.ml_model import ElephasEstimator
from elephas import optimizers as elephas_optimizers

# Define elephas optimizer (which tells the model how to aggregate updates on the Spark master)
adadelta = elephas_optimizers.Adadelta()

# Initialize SparkML Estimator and set all relevant properties
estimator = ElephasEstimator()
estimator.setFeaturesCol("scaled_features")             # These two come directly from pyspark,
estimator.setLabelCol("index_category")                 # hence the camel case. Sorry :)
estimator.set_keras_model_config(model.to_yaml())       # Provide serialized Keras model
estimator.set_optimizer_config(adadelta.get_config())   # Provide serialized Elephas optimizer
estimator.set_categorical_labels(True)
estimator.set_nb_classes(nb_classes)
estimator.set_num_workers(1)  # We just use one worker here. Feel free to adapt it.
estimator.set_nb_epoch(20) 
estimator.set_batch_size(128)
estimator.set_verbosity(1)
estimator.set_validation_split(0.15)



示例#22
0
def test_functional_model(spark_context, classification_model_functional,
                          mnist_data):
    batch_size = 64
    epochs = 1

    x_train, y_train, x_test, y_test = mnist_data
    x_train = x_train[:1000]
    y_train = y_train[:1000]
    df = to_data_frame(spark_context, x_train, y_train, categorical=True)
    test_df = to_data_frame(spark_context, x_test, y_test, categorical=True)

    sgd = optimizers.SGD()
    sgd_conf = optimizers.serialize(sgd)
    estimator = ElephasEstimator()
    estimator.set_keras_model_config(classification_model_functional.to_yaml())
    estimator.set_optimizer_config(sgd_conf)
    estimator.set_mode("synchronous")
    estimator.set_loss("categorical_crossentropy")
    estimator.set_metrics(['acc'])
    estimator.set_epochs(epochs)
    estimator.set_batch_size(batch_size)
    estimator.set_validation_split(0.1)
    estimator.set_categorical_labels(True)
    estimator.set_nb_classes(10)
    pipeline = Pipeline(stages=[estimator])
    fitted_pipeline = pipeline.fit(df)
    prediction = fitted_pipeline.transform(test_df)
    pnl = prediction.select("label", "prediction")
    pnl.show(100)

    prediction_and_label = pnl.rdd.map(lambda row: (row.label, row.prediction))
    metrics = MulticlassMetrics(prediction_and_label)
    print(metrics.accuracy)
示例#23
0
model.add(Activation('relu'))
model.add(Dense(1))

# Create Spark context
conf = SparkConf().setAppName('BostonHousing_Spark_MLP').setMaster('local[*]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
df = to_data_frame(sc, x_train, y_train)
test_df = to_data_frame(sc, x_test, y_test)

sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
sgd_conf = optimizers.serialize(sgd)

# Initialize Spark ML Estimator
estimator = ElephasEstimator()
estimator.set_keras_model_config(model.to_yaml())
estimator.set_optimizer_config(sgd_conf)
estimator.set_mode("synchronous")
estimator.set_loss("mae")
estimator.set_metrics(['mse'])
estimator.set_epochs(epochs)
estimator.set_batch_size(batch_size)
estimator.set_validation_split(0.1)
estimator.set_categorical_labels(False)

# Fitting a model returns a Transformer
pipeline = Pipeline(stages=[estimator])
fitted_pipeline = pipeline.fit(df)

# Evaluate Spark model by evaluating the underlying model