def costs_movies(cluster, train, test): for c in cluster: m = KMeans().train(train, k=c, maxIterations=10, runs=3) wscc = m.computeCost(test) print("WSCC for k=" + str(c) + ":" + str(wscc))
print("movie cluster model kmeans :") print(movie_cluster_model) user_cluster_model = KMeans().train(user_vectors, k=5, maxIterations=10, runs=3) print("user cluster model kmeans :") print(user_cluster_model) # predict movie_1 = movie_vectors.first() movie_cluster = movie_cluster_model.predict(movie_1) print(movie_cluster) # evaluation movie_cost = movie_cluster_model.computeCost(movie_vectors) print("WCSS for movies :" + str(movie_cost)) train_test_split_movies = movie_vectors.randomSplit((0.6, 0.4), 123) train_movies = train_test_split_movies[0] test_movies = train_test_split_movies[1] def costs_movies(cluster, train, test): for c in cluster: m = KMeans().train(train, k=c, maxIterations=10, runs=3) wscc = m.computeCost(test) print("WSCC for k=" + str(c) + ":" + str(wscc)) cluster_list = [2, 3, 4, 5, 10, 20] costs_movies(cluster_list, train_movies, test_movies)