示例#1
0
def costs_movies(cluster, train, test):
    for c in cluster:
        m = KMeans().train(train, k=c, maxIterations=10, runs=3)
        wscc = m.computeCost(test)
        print("WSCC for k=" + str(c) + ":" + str(wscc))
示例#2
0
print("movie cluster model kmeans :")
print(movie_cluster_model)
user_cluster_model = KMeans().train(user_vectors,
                                    k=5,
                                    maxIterations=10,
                                    runs=3)
print("user cluster model kmeans :")
print(user_cluster_model)

# predict
movie_1 = movie_vectors.first()
movie_cluster = movie_cluster_model.predict(movie_1)
print(movie_cluster)

# evaluation
movie_cost = movie_cluster_model.computeCost(movie_vectors)
print("WCSS for movies :" + str(movie_cost))
train_test_split_movies = movie_vectors.randomSplit((0.6, 0.4), 123)
train_movies = train_test_split_movies[0]
test_movies = train_test_split_movies[1]


def costs_movies(cluster, train, test):
    for c in cluster:
        m = KMeans().train(train, k=c, maxIterations=10, runs=3)
        wscc = m.computeCost(test)
        print("WSCC for k=" + str(c) + ":" + str(wscc))


cluster_list = [2, 3, 4, 5, 10, 20]
costs_movies(cluster_list, train_movies, test_movies)