def evaluate_topic_models(data, varying_parameters, constant_parameters=None, n_max_processes=None, return_models=False, metric=None, **metric_kwargs): """ Compute several Topic Models in parallel using the "gensim" package. Calculate the models using a list of varying parameters `varying_parameters` on a single Document-Term-Matrix `data`. Pass parameters in `constant_parameters` dict to each model calculation. Use at maximum `n_max_processes` processors or use all available processors if None is passed. `data` must be a Document-Term-Matrix (NumPy array/matrix, SciPy sparse matrix). Will return a list of size `len(varying_parameters)` containing tuples `(parameter_set, eval_results)` where `parameter_set` is a dict of the used parameters and `eval_results` is a dict of metric names -> metric results. """ mp_eval = MultiprocEvaluationRunner(MultiprocEvaluationWorkerGensim, AVAILABLE_METRICS, data, varying_parameters, constant_parameters, metric=metric or DEFAULT_METRICS, metric_options=metric_kwargs, n_max_processes=n_max_processes, return_models=return_models) return mp_eval.run()
def evaluate_topic_models(data, varying_parameters, constant_parameters=None, n_max_processes=None, return_models=False, metric=None, **metric_kwargs): """ Compute several Topic Models in parallel using the "gensim" package. Calculate the models using a list of varying parameters `varying_parameters` on a single Document-Term-Matrix `data`. Pass parameters in `constant_parameters` dict to each model calculation. Use at maximum `n_max_processes` processors or use all available processors if None is passed. `data` must be a Document-Term-Matrix (NumPy array/matrix, SciPy sparse matrix). Will return a list of size `len(varying_parameters)` containing tuples `(parameter_set, eval_results)` where `parameter_set` is a dict of the used parameters and `eval_results` is a dict of metric names -> metric results: .. code-block:: text [(parameter_set_1, {'<metric_name>': result_1, ...}), ..., (parameter_set_n, {'<metric_name>': result_n, ...})]) .. seealso:: Results can be simplified using :func:`tmtoolkit.topicmod.evaluate.results_by_parameter`. :param data: a (sparse) 2D array/matrix :param varying_parameters: list of dicts with parameters; each parameter set will be used in a separate evaluation :param constant_parameters: dict with parameters that are the same for all parallel computations :param n_max_processes: maximum number of worker processes to spawn :param return_models: if True, also return the computed models in the evaluation results :param metric: string or list of strings; if given, use only this metric(s) for evaluation; must be subset of `available_metrics` :param metric_kwargs: dict of options for metric used metric(s) :return: list of evaluation results for each varying parameter set as described above """ mp_eval = MultiprocEvaluationRunner(MultiprocEvaluationWorkerGensim, AVAILABLE_METRICS, data, varying_parameters, constant_parameters, metric=metric or DEFAULT_METRICS, metric_options=metric_kwargs, n_max_processes=n_max_processes, return_models=return_models) return mp_eval.run()