# 对于回归问题评估器可选择RegressionEvaluator,二值数据可选择BinaryClassificationEvaluator,多分类问题可选择MulticlassClassificationEvaluator cv = CrossValidator().setEstimator(lrPipeline).setEvaluator( MulticlassClassificationEvaluator().setLabelCol( "indexedLabel").setPredictionCol( "prediction")).setEstimatorParamMaps(paramGrid).setNumFolds(3) cvModel = cv.fit(train) lrPredictions = cvModel.transform(test) lrPreRel = lrPredictions.select("predictedLabel", "label", "features", "probability").collect() for item in lrPreRel: print( str(item['label']) + ',' + str(item['features']) + '-->prob=' + str(item['probability']) + ',predictedLabel' + str(item['predictedLabel'])) evaluator = MulticlassClassificationEvaluator().setLabelCol( "indexedLabel").setPredictionCol("prediction") lrAccuracy = evaluator.evaluate(lrPredictions) print("lrAccuracy:{}".format(lrAccuracy)) # 获取最优的逻辑斯蒂回归模型,并查看其具体的参数 bestModel = cvModel.bestModel lrModel = bestModel.stages[2] print("Coefficients: " + str(lrModel.coefficientMatrix) + "Intercept: " + str(lrModel.interceptVector) + "numClasses: " + str(lrModel.numClasses) + "numFeatures: " + str(lrModel.numFeatures)) print(lr.explainParam(lr.regParam)) print(lr.explainParam(lr.elasticNetParam))
# lrModel = lrPipelineModel.stages[2] # print("Coefficients: " + str(lrModel.coefficients) + "Intercept: " + str(lrModel.intercept) + "numClasses: " + str( # lrModel.numClasses) + "numFeatures: " + str(lrModel.numFeatures)) # paramGrid = ParamGridBuilder().addGrid(lr.elasticNetParam, [0.2, 0.8]).addGrid( lr.regParam, [0.01, 0.1, 0.5]).build() cv = CrossValidator().setEstimator(lrPipeline).setEvaluator( MulticlassClassificationEvaluator().setLabelCol( "indexedLabel").setPredictionCol("prediction")).setEstimatorParamMaps( paramGrid).setNumFolds(3) cvModel = cv.fit(trainingData) lrPredictions = cvModel.transform(testData) lrPreRel = lrPredictions.select("predictedLabel", "label", "features", "probability").collect() for item in lrPreRel: print( str(item['label']) + ',' + str(item['features']) + '-->prob=' + str(item['probability']) + ',predictedLabel' + str(item['predictedLabel'])) evaluator = MulticlassClassificationEvaluator().setLabelCol( "indexedLabel").setPredictionCol("prediction") lrAccuracy = evaluator.evaluate(lrPredictions) bestModel = cvModel.bestModel lrModel = bestModel.stages[2] print("Coefficients: " + str(lrModel.coefficientMatrix) + "Intercept: " + str(lrModel.interceptVector) + "numClasses: " + str(lrModel.numClasses) + "numFeatures: " + str(lrModel.numFeatures)) lr.explainParam(lr.regParam) lr.explainParam(lr.elasticNetParam)