示例#1
0
MaxIter = 10
# 과적합 방지
RegParam = 0.01

als = ALS()\
    .setMaxIter(MaxIter)\
    .setAlpha(Alpha)\
    .setRegParam(RegParam)\
    .setRank(Rank)\
    .setRegParam(RegParam)\
    .setImplicitPrefs(implicitPrefs)\
    .setUserCol("userId")\
    .setItemCol("movieId")\
    .setRatingCol("rating")

als.explainParams()

alsModel = als.fit(training)
predictions = alsModel.transform(test)

# COMMAND ----------
# user와 Item간의 weight Factor
# Rank의 갯수에 따라서 달라진다.
alsModel.userFactors.show(10, False)

user_recs = alsModel.recommendForAllUsers(10)
user_recs.show()
user_recs.where(user_recs.userId == 0)\
    .select("recommendations.movieId", "recommendations.rating")\
    .collect()
ratings = spark.read.text("/data/sample_movielens_ratings.txt")\
  .rdd.toDF()\
  .selectExpr("split(value , '::') as col")\
  .selectExpr(
    "cast(col[0] as int) as userId",
    "cast(col[1] as int) as movieId",
    "cast(col[2] as float) as rating",
    "cast(col[3] as long) as timestamp")
training, test = ratings.randomSplit([0.8, 0.2])
als = ALS()\
  .setMaxIter(5)\
  .setRegParam(0.01)\
  .setUserCol("userId")\
  .setItemCol("movieId")\
  .setRatingCol("rating")
print als.explainParams()
alsModel = als.fit(training)
predictions = alsModel.transform(test)


# COMMAND ----------

alsModel.recommendForAllUsers(10)\
  .selectExpr("userId", "explode(recommendations)").show()
alsModel.recommendForAllItems(10)\
  .selectExpr("movieId", "explode(recommendations)").show()


# COMMAND ----------

from pyspark.ml.evaluation import RegressionEvaluator
recommendation_data.show()

# Save modeling data:
recommendation_data.write.parquet("data/recommendation_data/", mode="overwrite")


# ## Create train and test datasets

(train, test) = recommendation_data.randomSplit(weights=[0.75, 0.25], seed=12345)


# ## Specify and fit an ALS model

from pyspark.ml.recommendation import ALS
als = ALS(userCol="user", itemCol="artist", ratingCol="playcount", implicitPrefs=True, seed=23456)
print(als.explainParams())
als_model = als.fit(train)


# ## Examine the ALS model

als_model.userFactors.head(5)
als_model.itemFactors.head(5)

# **Note:** Some artists are not represented in the training data:
als_model.userFactors.count()
als_model.itemFactors.count()


# ## Apply the model