Пример #1
0
def LDA_rep(window_data, n_topics=10, max_iters=20):
    #data is in sliding window form
    ui_matrix = window_data[["user", "artist", "sum"]]\
     .pivot(index = "user", columns = "artist", values = "sum").fillna(0)
    iu_matrix = window_data[["user", "artist", "sum"]]\
     .pivot(index = "artist", columns = "user", values = "sum").fillna(0)
    ui_lda = LatentDirichletAllocation(n_components=n_topics,
                                       max_iter=max_iters,
                                       learning_method='online',
                                       learning_offset=5.).fit(ui_matrix)
    iu_lda = LatentDirichletAllocation(n_components=n_topics,
                                       max_iter=max_iters,
                                       learning_method='online',
                                       learning_offset=5.).fit(iu_matrix)
    ti_matrix = ui_lda.components_
    ut_matrix = ui_lda.fit_transform(ui_matrix)
    tu_matrix = iu_lda.components_
    it_matrix = iu_lda.fit_transform(iu_matrix)
    res = {
        "ui": ui_matrix,
        "ti": ti_matrix,
        "ut": ut_matrix,
        "tu": tu_matrix,
        "it": it_matrix,
        "users": ui_matrix.index,
        "artists": iu_matrix.index
    }
    res["user_sim"] = hf.LDASim(res["ut"])
    res["artist_sim"] = hf.LDASim(res["it"])
    return (res)
Пример #2
0
tf = iu_matrix / (iu_matrix.sum(axis=1)[:, np.newaxis])
idf = (iu_matrix > 0) / (iu_matrix > 0).sum(axis=0)
iu_matrix_tfidf = np.multiply(tf, idf)

#transform item_user matrix to item_topic and topic_user matrix
iu_lda = LatentDirichletAllocation(n_topics=n_topics,
                                   max_iter=iters,
                                   learning_method='online',
                                   learning_offset=50.,
                                   random_state=0).fit(iu_matrix_tfidf)

tu_matrix = iu_lda.components_
it_matrix = iu_lda.fit_transform(iu_matrix_tfidf)

user_sim = hf.LDASim(ut_matrix)
item_sim = hf.LDASim(it_matrix)

user_div = hf.LDADiversity(ut_matrix)

hf.LDAClustering(item_sim, ui_matrix)

hf.draw_graph(graph)

#Clustering bipartite graph
#forming bipartite graph W
m, n = ui_matrix.shape
axis_label = np.concatenate((ui_matrix.index, ui_matrix.columns))
W = np.block([[np.zeros((m, m)), ui_matrix], [ui_matrix.T, np.zeros((n, n))]])
D = np.diag(W.sum(axis=0))
W_scaled = np.linalg.inv(D)**(1 / 2) @ W @ np.linalg.inv(D)**(1 / 2)