lambda p: Row(userId=int(p[0]), movieId=int(p[1]), rating=float(p[2]))) ratings = spark.createDataFrame(ratingsRDD) (training, test) = ratings.randomSplit([0.8, 0.2]) # Build the recommendation model using ALS on the training data # Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics als = ALS(rank=10, maxIter=5, regParam=0.01, implicitPrefs=True, alpha=40.0, userCol="userId", itemCol="movieId", ratingCol="rating", coldStartStrategy="drop") print( "\nALS training with implicitPrefs={}, rank={}, maxIter={}, regParam={}, alpha={}, seed={}\n" .format(als.getImplicitPrefs(), als.getRank(), als.getMaxIter(), als.getRegParam(), als.getAlpha(), als.getSeed())) model = als.fit(training) # Evaluate the model by computing the RMSE on the test data predictions = model.transform(test) evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating", predictionCol="prediction") rmse = evaluator.evaluate(predictions) print("Root-mean-square error = " + str(rmse)) spark.stop()