Exemplo n.º 1
0
# MAGIC    dataset given the specified metric
# MAGIC 
# MAGIC 3. The **`CompueModelStatistics`** Transformer computes the different
# MAGIC    metrics on a scored dataset (in our case, the `validation` dataset)
# MAGIC    at the same time

# COMMAND ----------

from mmlspark.train import TrainClassifier, ComputeModelStatistics
from mmlspark.automl import FindBestModel

# Prepare data for learning
train, test, validation = data.randomSplit([0.60, 0.20, 0.20], seed=123)

# Train the models on the 'train' data
lrHyperParams = [0.05, 0.1, 0.2, 0.4]
logisticRegressions = [LogisticRegression(regParam = hyperParam)
                       for hyperParam in lrHyperParams]
lrmodels = [TrainClassifier(model=lrm, labelCol="label", numFeatures=10000).fit(train)
            for lrm in logisticRegressions]

# Select the best model
bestModel = FindBestModel(evaluationMetric="AUC", models=lrmodels).fit(test)


# Get AUC on the validation dataset
predictions = bestModel.transform(validation)
metrics = ComputeModelStatistics().transform(predictions)
print("Best model's AUC on validation set = "
      + "{0:.2f}%".format(metrics.first()["AUC"] * 100))
Exemplo n.º 2
0
    for gbt in gbtclassifiers
]

trainedModels = lrmodels + rfmodels + gbtmodels

# COMMAND ----------

# MAGIC %md Find the best model for the given test dataset.

# COMMAND ----------

from mmlspark.automl import FindBestModel
bestModel = FindBestModel(evaluationMetric="AUC",
                          models=trainedModels).fit(ptest)
bestModel.getEvaluationResults().show()
bestModel.getBestModelMetrics().show()
bestModel.getAllModelMetrics().show()

# COMMAND ----------

# MAGIC %md Get the accuracy from the validation dataset.

# COMMAND ----------

from mmlspark.train import ComputeModelStatistics
predictions = bestModel.transform(pvalidation)
metrics = ComputeModelStatistics().transform(predictions)
print("Best model's accuracy on validation set = " +
      "{0:.2f}%".format(metrics.first()["accuracy"] * 100))
print("Best model's AUC on validation set = " +
      "{0:.2f}%".format(metrics.first()["AUC"] * 100))