def test_evaluation_callback_no_name(responses): # test with no metric name given to callback cb = EvaluationCallback() cb(responses) cb(responses) evaluation = cb.get_mean_evaluation() eval_name = list(evaluation)[0] assert eval_name == 'metric1' score = evaluation[eval_name] assert score == 0.75
def test_evaluation_callback_with_name(responses): # test with metric name given to callback evaluation_metric = 'metric2' cb = EvaluationCallback(evaluation_metric) cb(responses) cb(responses) evaluation = cb.get_mean_evaluation() eval_name = list(evaluation)[0] assert eval_name == evaluation_metric score = evaluation[eval_name] assert score == 0.5
def test_optimizer(tmpdir): best_parameters = {'JINA_DUMMYCRAFTER_PARAM1': 0, 'JINA_DUMMYCRAFTER_PARAM2': 1, 'JINA_DUMMYCRAFTER_PARAM3': 1} def document_generator(num_doc): for _ in range(num_doc): doc = Document(content='hello') groundtruth_doc = Document(content='hello') yield doc, groundtruth_doc eval_flow_runner = FlowRunner( flow_yaml='tests/integration/optimizers/flow.yml', documents=document_generator(10), batch_size=1, task='search', callback=EvaluationCallback(), ) multi_flow = MultiFlowRunner(eval_flow_runner) opt = OptunaOptimizer( multi_flow=multi_flow, parameter_yaml='tests/integration/optimizers/parameter.yml', workspace_base_dir=str(tmpdir) ) result = opt.optimize_flow(n_trials=10) result_path = str(tmpdir) + '/results/best_parameters.yml' result.save_parameters(result_path) parameters = result.best_parameters assert parameters == best_parameters assert yaml.load(open(result_path)) == best_parameters
def test_optimizer_single_flow(tmpdir, config, sampler): eval_flow_runner = SingleFlowRunner( flow_yaml=os.path.join(cur_dir, 'flow.yml'), documents=document_generator(10), request_size=1, execution_endpoint='search', ) grid_sampler_search_space = { 'JINA_DUMMYCRAFTER_PARAM1': [0, 1], 'JINA_DUMMYCRAFTER_PARAM2': [0, 1, 2], 'JINA_DUMMYCRAFTER_PARAM3': [1], } opt = FlowOptimizer( flow_runner=eval_flow_runner, parameter_yaml=os.path.join(cur_dir, 'parameter.yml'), evaluation_callback=EvaluationCallback(), workspace_base_dir=str(tmpdir), n_trials=5, sampler=sampler, ) if sampler == 'GridSampler': result = opt.optimize_flow(search_space=grid_sampler_search_space) else: result = opt.optimize_flow() validate_result(result, tmpdir)
def test_optimizer_multi_flow(tmpdir, config): multi_flow_runner = MultiFlowRunner( [ SingleFlowRunner( flow_yaml=os.path.join(cur_dir, 'flow.yml'), documents=document_generator(10), request_size=1, execution_endpoint='index', ), SingleFlowRunner( flow_yaml=os.path.join(cur_dir, 'flow.yml'), documents=document_generator(10), request_size=1, execution_endpoint='search', ), ] ) opt = FlowOptimizer( flow_runner=multi_flow_runner, parameter_yaml=os.path.join(cur_dir, 'parameter.yml'), evaluation_callback=EvaluationCallback(), workspace_base_dir=str(tmpdir), n_trials=5, ) result = opt.optimize_flow() validate_result(result, tmpdir)
def test_optimizer_search_space(tmpdir, config): eval_flow_runner = SingleFlowRunner( flow_yaml=os.path.join(cur_dir, "flow.yml"), documents=document_generator(10), request_size=1, execution_endpoint="search", ) _opt_kwargs = dict( flow_runner=eval_flow_runner, evaluation_callback=EvaluationCallback(), workspace_base_dir=str(tmpdir), n_trials=5, sampler="GridSampler", ) # check the search space construction with valid parameters opt = FlowOptimizer( **_opt_kwargs, parameter_yaml=os.path.join(cur_dir, "parameters_search_space.yml"), ) assert "JINA_PARAM1" in opt._search_space assert opt._search_space["JINA_PARAM1"] == [0, 1, 2] assert "JINA_PARAM2" in opt._search_space assert opt._search_space["JINA_PARAM2"] == ["foo", "bar"] assert "JINA_PARAM3" in opt._search_space assert opt._search_space["JINA_PARAM3"] == [1.0, 1.5, 2.0] assert "JINA_PARAM4" in opt._search_space assert opt._search_space["JINA_PARAM4"] == ["spam", "ham"] assert "JINA_PARAM4_SPAM_PARAM5" in opt._search_space assert opt._search_space["JINA_PARAM4_SPAM_PARAM5"] == [0, 1, 2] assert "JINA_PARAM4_HAM_PARAM56" in opt._search_space assert opt._search_space["JINA_PARAM4_HAM_PARAM56"] == [0, 1, 2] # check the search space construction with invalid uniform parameter _raised = False try: _ = FlowOptimizer( **_opt_kwargs, parameter_yaml=os.path.join(cur_dir, "parameters_search_space_uniform.yml"), ) except NotImplementedError: _raised = True assert _raised # check the search space construction with invalid loguniform parameter _raised = False try: _ = FlowOptimizer( **_opt_kwargs, parameter_yaml=os.path.join( cur_dir, "parameters_search_space_loguniform.yml" ), ) except NotImplementedError: _raised = True assert _raised
def test_optimizer_single_flow_option2(tmpdir, config): eval_flow_runner = SingleFlowRunner( flow_yaml=os.path.join(cur_dir, 'flow_pod_choice.yml'), documents=document_generator_option2(10), request_size=1, execution_endpoint='search', ) opt = FlowOptimizer( flow_runner=eval_flow_runner, parameter_yaml=os.path.join(cur_dir, 'parameter_pod_choice.yml'), evaluation_callback=EvaluationCallback(), workspace_base_dir=str(tmpdir), n_trials=20, ) result = opt.optimize_flow() assert ( result.best_parameters['JINA_DUMMYCRAFTER_CHOICE'] == 'pods/craft_option2.yml' ) assert result.best_parameters['JINA_DUMMYCRAFTER_PARAM4'] == 0 assert result.best_parameters['JINA_DUMMYCRAFTER_PARAM5'] == 1 assert result.best_parameters['JINA_DUMMYCRAFTER_PARAM6'] == 1
def callback(operator): return EvaluationCallback(EVAL_NAME, operator)