from pyspark.ml.evaluation import RegressionEvaluator # Create an RMSE evaluator using the label and predicted columns reg_eval = RegressionEvaluator(predictionCol="prediction", labelCol="rating", metricName="rmse") tolerance = 0.03 ranks = [4, 8, 12] errors = [0, 0, 0] models = [0, 0, 0] err = 0 min_error = float('inf') best_rank = -1 for rank in ranks: # Set the rank here: als.<FILL_IN> # Create the model with these parameters. model = als.fit(training_df) # Run the model to create a prediction. Predict against the validation_df. predict_df = model.<FILL_IN> # Remove NaN values from prediction (due to SPARK-14489) predicted_ratings_df = predict_df.filter(predict_df.prediction != float('nan')) # Run the previously created RMSE evaluator, reg_eval, on the predicted_ratings_df DataFrame error = reg_eval.<FILL_IN> errors[err] = error models[err] = model print 'For rank %s the RMSE is %s' % (rank, error) if error < min_error: min_error = error best_rank = err