Пример #1
0
def test_serialization():
    spark_model = SparkMLlibModel(model,
                                  frequency='epoch',
                                  mode='synchronous',
                                  num_workers=2)
    spark_model.save("test.h5")
    load_spark_model("test.h5")
Пример #2
0
def test_serialization(classification_model):
    rms = RMSprop()
    classification_model.compile(rms, 'categorical_crossentropy', ['acc'])
    spark_model = SparkMLlibModel(
        classification_model, frequency='epoch', mode='synchronous', num_workers=2)
    spark_model.save("test.h5")
    loaded_model = load_spark_model("test.h5")
    assert loaded_model.master_network.to_yaml()
def test_mllib_model(spark_context):
    # Build RDD from numpy features and labels
    lp_rdd = to_labeled_point(spark_context, x_train, y_train, categorical=True)

    # Initialize SparkModel from Keras model and Spark context
    spark_model = SparkMLlibModel(model=model, frequency='epoch', mode='synchronous')

    # Train Spark model
    spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0,
                    validation_split=0.1, categorical=True, nb_classes=nb_classes)

    # Evaluate Spark model by evaluating the underlying model
    score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
    print('Test accuracy:', score[1])
Пример #4
0
def test_mllib_model(spark_context, classification_model, mnist_data):
    rms = RMSprop()
    classification_model.compile(rms, 'categorical_crossentropy', ['acc'])
    x_train, y_train, x_test, y_test = mnist_data
    x_train = x_train[:1000]
    y_train = y_train[:1000]
    # Build RDD from numpy features and labels
    lp_rdd = to_labeled_point(spark_context, x_train,
                              y_train, categorical=True)

    # Initialize SparkModel from tensorflow.keras model and Spark context
    spark_model = SparkMLlibModel(
        model=classification_model, frequency='epoch', mode='synchronous')

    # Train Spark model
    spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0,
                    validation_split=0.1, categorical=True, nb_classes=nb_classes)

    # Evaluate Spark model by evaluating the underlying model
    score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
    assert score
Пример #5
0
model.add(Activation('softmax'))

# Compile model
rms = RMSprop()
model.compile(rms, "categorical_crossentropy", ['acc'])

# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True)

# Initialize SparkModel from tensorflow.keras model and Spark context
spark_model = SparkMLlibModel(model=model,
                              frequency='epoch',
                              mode='synchronous')

# Train Spark model
spark_model.fit(lp_rdd,
                epochs=5,
                batch_size=32,
                verbose=0,
                validation_split=0.1,
                categorical=True,
                nb_classes=nb_classes)

# Evaluate Spark model by evaluating the underlying model
score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
print('Test accuracy:', score[1])
Пример #6
0
model.add(Dropout(0.2))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

# Compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])

# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
#sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True)
rdd = lp_to_simple_rdd(lp_rdd, True, nb_classes)

# Initialize SparkModel from Keras model and Spark context
adagrad = elephas_optimizers.Adagrad()

spark_model = SparkMLlibModel(sc, model, optimizer=adagrad, frequency='batch', mode='asynchronous', num_workers=4)

# Train Spark model
spark_model.train(lp_rdd, nb_epoch=nb_epoch, batch_size=batch_size, verbose=0,
                  validation_split=0.1, categorical=True, nb_classes=nb_classes)

# Evaluate Spark model by evaluating the underlying model
loss, acc = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
print('Test accuracy:', acc)
Пример #7
0
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation('softmax'))

# Compile model
rms = RMSprop()
model.compile(loss='categorical_crossentropy', optimizer=rms)

# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
lp_rdd = to_labeled_point(sc, X_train, Y_train, categorical=True)
print(lp_rdd.first())
rdd = lp_to_simple_rdd(lp_rdd, True, nb_classes)
rdd = rdd.repartition(4)
rdd.first()

# Initialize SparkModel from Keras model and Spark context
adadelta = elephas_optimizers.Adadelta()
spark_model = SparkMLlibModel(sc,model, optimizer=adadelta, frequency='batch', mode='asynchronous', num_workers=2)

# Train Spark model
spark_model.train(lp_rdd, nb_epoch=20, batch_size=32, verbose=0, validation_split=0.1, categorical=True, nb_classes=nb_classes)

# Evaluate Spark model by evaluating the underlying model
score = spark_model.get_network().evaluate(X_test, Y_test, show_accuracy=True, verbose=2)
print('Test accuracy:', score[1])
Пример #8
0
rms = RMSprop()

# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True)
rdd = lp_to_simple_rdd(lp_rdd, True, nb_classes)

# Initialize SparkModel from Keras model and Spark context
adadelta = elephas_optimizers.Adadelta()
spark_model = SparkMLlibModel(sc,
                              model,
                              optimizer=adadelta,
                              frequency='batch',
                              mode='asynchronous',
                              num_workers=2,
                              master_optimizer=rms)

# Train Spark model
spark_model.train(lp_rdd,
                  nb_epoch=20,
                  batch_size=32,
                  verbose=0,
                  validation_split=0.1,
                  categorical=True,
                  nb_classes=nb_classes)

# Evaluate Spark model by evaluating the underlying model
score = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
Пример #9
0
                            outputCol="tf_features",
                            vocabSize=input_dim)
    # IDF
    idf = sf.IDF(inputCol="tf_features", outputCol="features")
    label_string = sf.StringIndexer(inputCol="first_label", outputCol="label")
    pipeline_dl = Pipeline(stages=[cv, idf, label_string])
    df = pipeline_dl.fit(training_set).transform(training_set)
    df = df.rdd.map(lambda x: (LabeledPoint(x[
        'label'], MLLibVectors.fromML(x['features']))))
    logger.info("Pipeline created ...")
    logger.info("Transforms the text into tf idf RDD ...")
    model = create_keras_model(input_dim, output_dim)

    logger.info("Starts Training ...")
    spark_model = SparkMLlibModel(model=model,
                                  frequency='epoch',
                                  mode='asynchronous',
                                  parameter_server_mode='socket')
    spark_model.fit(df,
                    epochs=epochs,
                    batch_size=132,
                    verbose=1,
                    validation_split=0.2,
                    categorical=True,
                    nb_classes=output_dim)

    logger.info("Training done")
    spark_model._master_network.save(save_dir + model_dir + "/" + filename)
    logger.info("Program ended succesfully ! Find the model at :" + save_dir +
                model_dir + "/" + filename)
Пример #10
0
# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
lp_rdd = to_labeled_point(sc, X_train, Y_train, categorical=True)
print(lp_rdd.first())
rdd = lp_to_simple_rdd(lp_rdd, True, nb_classes)
rdd = rdd.repartition(4)
rdd.first()

# Initialize SparkModel from Keras model and Spark context
adadelta = elephas_optimizers.Adadelta()
spark_model = SparkMLlibModel(sc,
                              model,
                              optimizer=adadelta,
                              frequency='batch',
                              mode='asynchronous',
                              num_workers=2)

# Train Spark model
spark_model.train(lp_rdd,
                  nb_epoch=20,
                  batch_size=32,
                  verbose=0,
                  validation_split=0.1,
                  categorical=True,
                  nb_classes=nb_classes)

# Evaluate Spark model by evaluating the underlying model
score = spark_model.get_network().evaluate(X_test,
                                           Y_test,
Пример #11
0
              metrics=["accuracy"])

# Create Spark context
conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]')
#sc = SparkContext(conf=conf)

# Build RDD from numpy features and labels
lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True)
rdd = lp_to_simple_rdd(lp_rdd, True, nb_classes)

# Initialize SparkModel from Keras model and Spark context
adagrad = elephas_optimizers.Adagrad()

spark_model = SparkMLlibModel(sc,
                              model,
                              optimizer=adagrad,
                              frequency='batch',
                              mode='asynchronous',
                              num_workers=4)

# Train Spark model
spark_model.train(lp_rdd,
                  nb_epoch=nb_epoch,
                  batch_size=batch_size,
                  verbose=0,
                  validation_split=0.1,
                  categorical=True,
                  nb_classes=nb_classes)

# Evaluate Spark model by evaluating the underlying model
loss, acc = spark_model.master_network.evaluate(x_test, y_test, verbose=2)
print('Test accuracy:', acc)