def test_random_search(self): spark = SparkSession \ .builder \ .master("local[3]") \ .appName("Python Spark SQL basic example") \ .getOrCreate() # Load training data df = spark.read.format("libsvm").load("./tests/sample_libsvm_data.txt").repartition(8) df.printSchema() backend = SparkBackend(spark_context=spark.sparkContext, num_workers=3) store = LocalStore('/tmp') ######## Random Search ########### search_space = {'lr': hp_choice([0.01, 0.001, 0.0001])} random_search = RandomSearch(backend, store, estimator_gen_fn, search_space, 3, 1, validation=0.25, evaluation_metric='loss', feature_columns=['features'], label_columns=['label']) model = random_search.fit(df) output_df = model.transform(df) output_df.select('label', 'label__output').show(n=10) assert True
def test_tpe(self): spark = SparkSession \ .builder \ .master("local[3]") \ .appName("Python Spark SQL basic example") \ .getOrCreate() # Load training data df = spark.read.format("libsvm").load( "./tests/sample_libsvm_data.txt").repartition(8) df.printSchema() backend = SparkBackend(spark_context=spark.sparkContext, num_workers=3) store = LocalStore('/tmp') def estimator_gen_fn(params): model = tf.keras.models.Sequential() model.add(tf.keras.layers.Input(shape=692, name='features')) model.add(tf.keras.layers.Dense(100, input_dim=692)) model.add(tf.keras.layers.Dense(1, input_dim=100)) model.add(tf.keras.layers.Activation('sigmoid')) optimizer = tf.keras.optimizers.Adam(lr=params['lr']) loss = 'binary_crossentropy' keras_estimator = SparkEstimator(model=model, optimizer=optimizer, loss=loss, metrics=['acc'], batch_size=10) return keras_estimator search_space = { 'lr': hp_choice([0.01, 0.001, 0.0001]), 'dummy1': hp_uniform(0, 100), 'dummy2': hp_quniform(0, 100, 1), 'dummy3': hp_qloguniform(0, 100, 1), } hyperopt = TPESearch(backend=backend, store=store, estimator_gen_fn=estimator_gen_fn, search_space=search_space, num_models=3, num_epochs=1, validation=0.25, evaluation_metric='loss', feature_columns=['features'], label_columns=['label'], verbose=2) model = hyperopt.fit(df) output_df = model.transform(df) output_df.select('label', 'label__output').show(n=10) assert True
estimator = SparkEstimator(model=model, optimizer=optimizer, loss='mae', metrics=[exp_rmspe], custom_objects=CUSTOM_OBJECTS, batch_size=params['batch_size']) return estimator # Define dictionary containing the parameter search space search_space = { 'lr': hp_loguniform(-5, -3), 'l2': hp_loguniform(-6, -4), 'num_layers': hp_choice([3, 4, 5, 6]), 'batch_size': hp_quniform(16, 128, 16) } # Instantiate model selection object model_selection = TPESearch(backend=backend, store=store, estimator_gen_fn=estimator_gen_fn, search_space=search_space, num_models=args.num_models, num_epochs=args.epochs, validation='Validation', evaluation_metric='loss', feature_columns=all_cols, label_columns=['Sales'], parallelism=args.num_workers)
return keras_estimator def main(): SPARK_MASTER_URL = 'spark://...' # Change the Spark master URL. DATA_STORE_PATH = 'file:///...' # Change data store path. Should be accessible from all Spark workers. spark = SparkSession \ .builder \ # Change the Spark Master URL .master(SPARK_MASTER_URL) \ .appName("Deep Postures Example") \ .getOrCreate() backend = SparkBackend(spark_context=spark.sparkContext, num_workers=1) store = LocalStore(DATA_STORE_PATH, train_path=os.path.join(DATA_STORE_PATH, 'train'), val_path=os.path.join(DATA_STORE_PATH, 'valid')) search_space = { 'lr': hp_choice([0.001, 0.0001]), 'l2_reg': hp_choice([0.001, 0.0001]), 'win_size': hp_choice([7, 9]), 'amp_factor': hp_choice([2, 4]) } model_selection = GridSearch(backend, store, estimator_gen_fn, search_space, 10, evaluation_metric='loss', feature_columns=['id', 'time', 'non_wear', 'sleeping', 'label', 'data'], label_columns=['label']) model = model_selection.fit_on_prepared_data() if __name__ == "__main__": main()