Exemplo n.º 1
0
models = [lg, rf, gbt]
mml_models = [
    TrainClassifier(model=model, labelCol="label") for model in models
]
param_builder = HyperparamBuilder() \
    .addHyperparam(lg, lg.regParam, RangeHyperParam(0.1, 0.3)) \
    .addHyperparam(rf, rf.numTrees, DiscreteHyperParam([5, 10])) \
    .addHyperparam(rf, rf.maxDepth, DiscreteHyperParam([3, 5])) \
    .addHyperparam(gbt, gbt.maxBins, RangeHyperParam(8, 16)) \
    .addHyperparam(gbt, gbt.maxDepth, DiscreteHyperParam([3, 5]))

search_space = param_builder.build()
print(search_space)
random_space = RandomSpace(search_space)

best_model = TuneHyperparameters(evaluationMetric="accuracy",
                                 models=mml_models,
                                 numFolds=2,
                                 numRuns=len(mml_models) * 2,
                                 parallelism=1,
                                 paramSpace=random_space.space(),
                                 seed=0).fit(train_data)

print(best_model.getBestModelInfo())
print(best_model.getBestModel())

prediction = best_model.transform(test_data)
metrics = ComputeModelStatistics().transform(prediction)
print(metrics.limit(10).toPandas())