def test_compute_models_parallel_sklearn(): passed_params = {'n_components', 'learning_method', 'evaluate_every', 'max_iter', 'n_jobs'} varying_params = [dict(n_components=k) for k in range(2, 5)] const_params = dict(learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1) models = tm_sklearn.compute_models_parallel(EVALUATION_TEST_DTM, varying_params, const_params) assert len(models) == len(varying_params) for param_set, model in models: assert set(param_set.keys()) == passed_params assert isinstance(model, LatentDirichletAllocation) assert isinstance(model.components_, np.ndarray)
def test_compute_models_parallel_sklearn_multiple_docs(): # 1 doc, no varying params const_params = dict(n_components=3, learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1) models = tm_sklearn.compute_models_parallel(EVALUATION_TEST_DTM, constant_parameters=const_params) assert len(models) == 1 assert type(models) is list assert len(models[0]) == 2 param1, model1 = models[0] assert param1 == const_params assert isinstance(model1, LatentDirichletAllocation) assert isinstance(model1.components_, np.ndarray) # 1 *named* doc, some varying params passed_params = {'n_components', 'learning_method', 'evaluate_every', 'max_iter', 'n_jobs'} const_params = dict(learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1) varying_params = [dict(n_components=k) for k in range(2, 5)] docs = {'test1': EVALUATION_TEST_DTM} models = tm_sklearn.compute_models_parallel(docs, varying_params, constant_parameters=const_params) assert len(models) == len(docs) assert isinstance(models, dict) assert set(models.keys()) == {'test1'} for d, m in models.items(): assert d == 'test1' assert len(m) == len(varying_params) for param_set, model in m: assert set(param_set.keys()) == passed_params assert isinstance(model, LatentDirichletAllocation) assert isinstance(model.components_, np.ndarray) # n docs, no varying params const_params = dict(n_components=3, learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1) models = tm_sklearn.compute_models_parallel(EVALUATION_TEST_DTM_MULTI, constant_parameters=const_params) assert len(models) == len(EVALUATION_TEST_DTM_MULTI) assert isinstance(models, dict) assert set(models.keys()) == set(EVALUATION_TEST_DTM_MULTI.keys()) for d, m in models.items(): assert len(m) == 1 for param_set, model in m: assert set(param_set.keys()) == set(const_params.keys()) assert isinstance(model, LatentDirichletAllocation) assert isinstance(model.components_, np.ndarray) # n docs, some varying params passed_params = {'n_components', 'learning_method', 'evaluate_every', 'max_iter', 'n_jobs'} const_params = dict(learning_method='batch', evaluate_every=1, max_iter=3, n_jobs=1) varying_params = [dict(n_components=k) for k in range(2, 5)] models = tm_sklearn.compute_models_parallel(EVALUATION_TEST_DTM_MULTI, varying_params, constant_parameters=const_params) assert len(models) == len(EVALUATION_TEST_DTM_MULTI) assert isinstance(models, dict) assert set(models.keys()) == set(EVALUATION_TEST_DTM_MULTI.keys()) for d, m in models.items(): assert len(m) == len(varying_params) for param_set, model in m: assert set(param_set.keys()) == passed_params assert isinstance(model, LatentDirichletAllocation) assert isinstance(model.components_, np.ndarray)