def test_compute_models_parallel_sklearn():
    passed_params = {'n_components', 'learning_method', 'evaluate_every', 'max_iter', 'n_jobs'}
    varying_params = [dict(n_components=k) for k in range(2, 5)]
    const_params = dict(learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1)

    models = tm_sklearn.compute_models_parallel(EVALUATION_TEST_DTM, varying_params, const_params)

    assert len(models) == len(varying_params)

    for param_set, model in models:
        assert set(param_set.keys()) == passed_params
        assert isinstance(model, LatentDirichletAllocation)
        assert isinstance(model.components_, np.ndarray)
def test_compute_models_parallel_sklearn_multiple_docs():
    # 1 doc, no varying params
    const_params = dict(n_components=3, learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1)
    models = tm_sklearn.compute_models_parallel(EVALUATION_TEST_DTM, constant_parameters=const_params)
    assert len(models) == 1
    assert type(models) is list
    assert len(models[0]) == 2
    param1, model1 = models[0]
    assert param1 == const_params
    assert isinstance(model1, LatentDirichletAllocation)
    assert isinstance(model1.components_, np.ndarray)

    # 1 *named* doc, some varying params
    passed_params = {'n_components', 'learning_method', 'evaluate_every', 'max_iter', 'n_jobs'}
    const_params = dict(learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1)
    varying_params = [dict(n_components=k) for k in range(2, 5)]
    docs = {'test1': EVALUATION_TEST_DTM}
    models = tm_sklearn.compute_models_parallel(docs, varying_params,
                                                         constant_parameters=const_params)
    assert len(models) == len(docs)
    assert isinstance(models, dict)
    assert set(models.keys()) == {'test1'}

    for d, m in models.items():
        assert d == 'test1'
        assert len(m) == len(varying_params)
        for param_set, model in m:
            assert set(param_set.keys()) == passed_params
            assert isinstance(model, LatentDirichletAllocation)
            assert isinstance(model.components_, np.ndarray)

    # n docs, no varying params
    const_params = dict(n_components=3, learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1)
    models = tm_sklearn.compute_models_parallel(EVALUATION_TEST_DTM_MULTI, constant_parameters=const_params)
    assert len(models) == len(EVALUATION_TEST_DTM_MULTI)
    assert isinstance(models, dict)
    assert set(models.keys()) == set(EVALUATION_TEST_DTM_MULTI.keys())

    for d, m in models.items():
        assert len(m) == 1
        for param_set, model in m:
            assert set(param_set.keys()) == set(const_params.keys())
            assert isinstance(model, LatentDirichletAllocation)
            assert isinstance(model.components_, np.ndarray)

    # n docs, some varying params
    passed_params = {'n_components', 'learning_method', 'evaluate_every', 'max_iter', 'n_jobs'}
    const_params = dict(learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1)
    varying_params = [dict(n_components=k) for k in range(2, 5)]
    models = tm_sklearn.compute_models_parallel(EVALUATION_TEST_DTM_MULTI, varying_params,
                                                         constant_parameters=const_params)
    assert len(models) == len(EVALUATION_TEST_DTM_MULTI)
    assert isinstance(models, dict)
    assert set(models.keys()) == set(EVALUATION_TEST_DTM_MULTI.keys())

    for d, m in models.items():
        assert len(m) == len(varying_params)
        for param_set, model in m:
            assert set(param_set.keys()) == passed_params
            assert isinstance(model, LatentDirichletAllocation)
            assert isinstance(model.components_, np.ndarray)