示例#1
0
        lambda p: Row(userId=int(p[0]), movieId=int(p[1]), rating=float(p[2])))
    ratings = spark.createDataFrame(ratingsRDD)
    (training, test) = ratings.randomSplit([0.8, 0.2])

    # Build the recommendation model using ALS on the training data
    # Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
    als = ALS(rank=10,
              maxIter=5,
              regParam=0.01,
              implicitPrefs=True,
              alpha=40.0,
              userCol="userId",
              itemCol="movieId",
              ratingCol="rating",
              coldStartStrategy="drop")
    print(
        "\nALS training with implicitPrefs={}, rank={}, maxIter={}, regParam={}, alpha={}, seed={}\n"
        .format(als.getImplicitPrefs(), als.getRank(), als.getMaxIter(),
                als.getRegParam(), als.getAlpha(), als.getSeed()))
    model = als.fit(training)

    # Evaluate the model by computing the RMSE on the test data
    predictions = model.transform(test)
    evaluator = RegressionEvaluator(metricName="rmse",
                                    labelCol="rating",
                                    predictionCol="prediction")
    rmse = evaluator.evaluate(predictions)
    print("Root-mean-square error = " + str(rmse))

    spark.stop()