def test_compute_models_parallel_gensim():
    passed_params = {'num_topics', 'update_every', 'passes', 'iterations'}
    varying_params = [dict(num_topics=k) for k in range(2, 5)]
    const_params = dict(update_every=0, passes=1, iterations=1)

    models = tm_gensim.compute_models_parallel(EVALUATION_TEST_DTM, varying_params, const_params)

    assert len(models) == len(varying_params)

    for param_set, model in models:
        assert set(param_set.keys()) == passed_params
        assert isinstance(model, gensim.models.LdaModel)
        assert isinstance(model.state.get_lambda(), np.ndarray)
def test_compute_models_parallel_gensim_multiple_docs():
    # 1 doc, no varying params
    const_params = dict(num_topics=3, update_every=0, passes=1, iterations=1)
    models = tm_gensim.compute_models_parallel(EVALUATION_TEST_DTM, constant_parameters=const_params)
    assert len(models) == 1
    assert type(models) is list
    assert len(models[0]) == 2
    param1, model1 = models[0]
    assert param1 == const_params
    assert isinstance(model1, gensim.models.LdaModel)
    assert isinstance(model1.state.get_lambda(), np.ndarray)

    # 1 *named* doc, some varying params
    passed_params = {'num_topics', 'update_every', 'passes', 'iterations'}
    const_params = dict(update_every=0, passes=1, iterations=1)
    varying_params = [dict(num_topics=k) for k in range(2, 5)]
    docs = {'test1': EVALUATION_TEST_DTM}
    models = tm_gensim.compute_models_parallel(docs, varying_params,
                                                        constant_parameters=const_params)
    assert len(models) == len(docs)
    assert isinstance(models, dict)
    assert set(models.keys()) == {'test1'}

    for d, m in models.items():
        assert d == 'test1'
        assert len(m) == len(varying_params)
        for param_set, model in m:
            assert set(param_set.keys()) == passed_params
            assert isinstance(model, gensim.models.LdaModel)
            assert isinstance(model.state.get_lambda(), np.ndarray)

    # n docs, no varying params
    const_params = dict(num_topics=3, update_every=0, passes=1, iterations=1)
    models = tm_gensim.compute_models_parallel(EVALUATION_TEST_DTM_MULTI, constant_parameters=const_params)
    assert len(models) == len(EVALUATION_TEST_DTM_MULTI)
    assert isinstance(models, dict)
    assert set(models.keys()) == set(EVALUATION_TEST_DTM_MULTI.keys())

    for d, m in models.items():
        assert len(m) == 1
        for param_set, model in m:
            assert set(param_set.keys()) == set(const_params.keys())
            assert isinstance(model, gensim.models.LdaModel)
            assert isinstance(model.state.get_lambda(), np.ndarray)

    # n docs, some varying params
    passed_params = {'num_topics', 'update_every', 'passes', 'iterations'}
    const_params = dict(update_every=0, passes=1, iterations=1)
    varying_params = [dict(num_topics=k) for k in range(2, 5)]
    models = tm_gensim.compute_models_parallel(EVALUATION_TEST_DTM_MULTI, varying_params,
                                                        constant_parameters=const_params)
    assert len(models) == len(EVALUATION_TEST_DTM_MULTI)
    assert isinstance(models, dict)
    assert set(models.keys()) == set(EVALUATION_TEST_DTM_MULTI.keys())

    for d, m in models.items():
        assert len(m) == len(varying_params)
        for param_set, model in m:
            assert set(param_set.keys()) == passed_params
            assert isinstance(model, gensim.models.LdaModel)
            assert isinstance(model.state.get_lambda(), np.ndarray)