def test_lda_preplexity(): """ Test LDA preplexity for batch training preplexity should be lower after each iteration """ n_topics, alpha, eta, X = _build_sparse_mtx() lda_1 = OnlineLDA(n_topics=n_topics, alpha=alpha, eta=eta, random_state=0) lda_2 = OnlineLDA(n_topics=n_topics, alpha=alpha, eta=eta, random_state=0) distr_1 = lda_1.fit_transform(X, max_iters=1) prep_1 = lda_1.preplexity(X, distr_1, sub_sampling=False) distr_2 = lda_2.fit_transform(X, max_iters=10) prep_2 = lda_2.preplexity(X, distr_2, sub_sampling=False) assert_greater_equal(prep_1, prep_2)
def test_lda_normalize_docs(): """ test sum of topic distribution equals to 1 for each doc """ rng = np.random.RandomState(0) n_topics, alpha, eta, X = _build_sparse_mtx() lda = OnlineLDA(n_topics=n_topics, alpha=alpha, eta=eta, random_state=rng) X_fit = lda.fit_transform(X) assert_array_almost_equal(X_fit.sum(axis=1), np.ones(X.shape[0]))
def test_lda_fit_transform(): """ Test LDA fit_transform & transform fit_transform and transform result should be the same """ rng = np.random.RandomState(0) n_topics, alpha, eta, X = _build_sparse_mtx() lda = OnlineLDA(n_topics=n_topics, alpha=alpha, eta=eta, random_state=rng) X_fit = lda.fit_transform(X) X_trans = lda.transform(X) assert_array_almost_equal(X_fit, X_trans, 4)
def test_lda_dense_input(): """ Test LDA with dense input. Similar to test_lda() """ rng = np.random.RandomState(0) X = rng.randint(5, size=(20, 10)) n_topics = 3 alpha0 = eta0 = 1.0 / n_topics lda = OnlineLDA(n_topics=n_topics, alpha=alpha0, eta=eta0, random_state=rng) X_trans = lda.fit_transform(X) assert_true((X_trans > 0.0).any())
def test_lda_dense_input(): """ Test LDA with dense input. Similar to test_lda() """ rng = np.random.RandomState(0) X = rng.randint(5, size=(20, 10)) n_topics = 3 alpha0 = eta0 = 1. / n_topics lda = OnlineLDA(n_topics=n_topics, alpha=alpha0, eta=eta0, random_state=rng) X_trans = lda.fit_transform(X) assert_true((X_trans > 0.0).any())