def test_benchmark_metrics_exception(self): pass signals = [self.signal] datasets = {self.dataset: signals} pipelines = {self.name: self.pipeline} metric = 'does-not-exist' metrics = [metric] with self.assertRaises(ValueError) as ex: benchmark.benchmark(pipelines, datasets, self.hyper, metrics, self.rank) self.assertTrue(metric in ex.exception)
def test_benchmark_metrics_exception(self, evaluate_datasets_mock): test_split = False detrend = False signals = [self.signal] datasets = {self.dataset: signals} pipelines = {self.name: self.pipeline} metric = 'does-not-exist' metrics = [metric] score = self.set_score(1, ANY, test_split) evaluate_datasets_mock.return_value = pd.DataFrame.from_records( [score]) with self.assertRaises(ValueError) as ex: benchmark.benchmark(pipelines, datasets, self.hyper, metrics, self.rank, self.distributed, test_split, detrend) self.assertTrue(metric in ex.exception)
def test_benchmark_metrics_list(self, run_job_mock): pass signals = [self.signal] datasets = {self.dataset: signals} pipelines = {self.name: self.pipeline} metric = Mock(autospec=METRICS['f1'], return_value=1) metric.__name__ = 'metric-name' metrics = [metric] metrics_ = {metric.__name__: metric} output = self.set_output(1, ANY, ANY) output[metric.__name__] = metric run_job_mock.return_value = pd.DataFrame.from_records([output]) order = [ 'pipeline', 'rank', 'dataset', 'signal', 'iteration', 'metric-name', 'status', 'elapsed', 'split', 'run_id'] expected_return = pd.DataFrame.from_records([{ 'metric-name': metric, 'rank': 1, 'elapsed': ANY, 'split': ANY, 'status': 'OK', 'iteration': self.iteration, 'pipeline': self.name, 'dataset': self.dataset, 'signal': self.signal, 'run_id': self.run_id }])[order] returned = benchmark.benchmark( pipelines, datasets, self.hyper, metrics, self.rank)[order] pd.testing.assert_frame_equal(returned, expected_return) args = list(self.args()) args[5] = metrics_ run_job_mock.assert_called_once_with(tuple(args))
def test_benchmark_pipelines_list(self, run_job_mock): signals = [self.signal] datasets = {self.dataset: signals} pipelines = [self.pipeline] hyper = {} output = self.set_output(1, ANY, ANY) output['pipeline'] = self.pipeline run_job_mock.return_value = pd.DataFrame.from_records([output]) order = [ 'pipeline', 'rank', 'dataset', 'signal', 'iteration', 'metric-name', 'status', 'elapsed', 'split', 'run_id'] expected_return = pd.DataFrame.from_records([{ 'metric-name': 1, 'rank': 1, 'elapsed': ANY, 'split': ANY, 'status': 'OK', 'iteration': self.iteration, 'pipeline': self.pipeline, 'dataset': self.dataset, 'signal': self.signal, 'run_id': self.run_id }])[order] returned = benchmark.benchmark( pipelines, datasets, hyper, self.metrics, self.rank)[order] pd.testing.assert_frame_equal(returned, expected_return) args = list(self.args()) args[1] = self.pipeline args[4] = hyper run_job_mock.assert_called_once_with(tuple(args))
def test_benchmark_metrics_list(self, evaluate_datasets_mock): test_split = False detrend = False signals = [self.signal] datasets = {self.dataset: signals} pipelines = {self.name: self.pipeline} metric = Mock(autospec=METRICS['f1'], return_value=1) metric.__name__ = 'metric-name' metrics = [metric] metrics_ = {metric.__name__: metric} score = self.set_score(1, ANY, test_split) score[metric.__name__] = metric evaluate_datasets_mock.return_value = pd.DataFrame.from_records( [score]) order = [ 'pipeline', 'rank', 'dataset', 'elapsed', 'metric-name', 'signal', 'split', 'status' ] expected_return = pd.DataFrame.from_records([{ 'rank': 1, 'metric-name': metric, 'elapsed': ANY, 'split': test_split, 'pipeline': self.name, 'dataset': self.dataset, 'signal': self.signal, 'status': 'OK' }])[order] returned = benchmark.benchmark(pipelines, datasets, self.hyper, metrics, self.rank, self.distributed, test_split, detrend) pd.testing.assert_frame_equal(returned, expected_return) evaluate_datasets_mock.assert_called_once_with(pipelines, datasets, self.hyper, metrics_, self.distributed, test_split, detrend)
def _evaluate(args): if args.all: pipelines = get_available_templates() else: pipelines = args.pipeline scores = benchmark(pipelines=pipelines, datasets=args.signal, metrics=args.metric, rank=args.rank, test_split=args.holdout) if args.output: print('Writing results in {}'.format(args.output)) scores.to_csv(args.output, index=False) print(tabulate.tabulate( scores, showindex=False, tablefmt='github', headers=scores.columns ))
def test_benchmark_pipelines_list(self, evaluate_datasets_mock): test_split = False detrend = False signals = [self.signal] datasets = {self.dataset: signals} pipelines = [self.pipeline] pipelines_ = {self.pipeline: self.pipeline} score = self.set_score(1, ANY, test_split) score['pipeline'] = self.pipeline evaluate_datasets_mock.return_value = pd.DataFrame.from_records( [score]) order = [ 'pipeline', 'rank', 'dataset', 'elapsed', 'metric-name', 'signal', 'split', 'status' ] expected_return = pd.DataFrame.from_records([{ 'rank': 1, 'metric-name': 1, 'elapsed': ANY, 'split': test_split, 'pipeline': self.pipeline, 'dataset': self.dataset, 'signal': self.signal, 'status': 'OK' }])[order] returned = benchmark.benchmark(pipelines, datasets, self.hyper, self.metrics, self.rank, self.distributed, test_split, detrend) pd.testing.assert_frame_equal(returned, expected_return) evaluate_datasets_mock.assert_called_once_with(pipelines_, datasets, self.hyper, self.metrics, self.distributed, test_split, detrend)