示例#1
0
def test_evaluation_callback_no_name(responses):
    # test with no metric name given to callback
    cb = EvaluationCallback()
    cb(responses)
    cb(responses)

    evaluation = cb.get_mean_evaluation()
    eval_name = list(evaluation)[0]
    assert eval_name == 'metric1'

    score = evaluation[eval_name]
    assert score == 0.75
示例#2
0
def test_evaluation_callback_with_name(responses):
    # test with metric name given to callback
    evaluation_metric = 'metric2'
    cb = EvaluationCallback(evaluation_metric)
    cb(responses)
    cb(responses)

    evaluation = cb.get_mean_evaluation()
    eval_name = list(evaluation)[0]
    assert eval_name == evaluation_metric

    score = evaluation[eval_name]
    assert score == 0.5
示例#3
0
def test_optimizer(tmpdir):
    best_parameters = {'JINA_DUMMYCRAFTER_PARAM1': 0,
                       'JINA_DUMMYCRAFTER_PARAM2': 1,
                       'JINA_DUMMYCRAFTER_PARAM3': 1}

    def document_generator(num_doc):
        for _ in range(num_doc):
            doc = Document(content='hello')
            groundtruth_doc = Document(content='hello')
        yield doc, groundtruth_doc

    eval_flow_runner = FlowRunner(
        flow_yaml='tests/integration/optimizers/flow.yml',
        documents=document_generator(10),
        batch_size=1,
        task='search',
        callback=EvaluationCallback(),
    )

    multi_flow = MultiFlowRunner(eval_flow_runner)

    opt = OptunaOptimizer(
        multi_flow=multi_flow,
        parameter_yaml='tests/integration/optimizers/parameter.yml',
        workspace_base_dir=str(tmpdir)
    )
    result = opt.optimize_flow(n_trials=10)
    result_path = str(tmpdir) + '/results/best_parameters.yml'
    result.save_parameters(result_path)
    parameters = result.best_parameters

    assert parameters == best_parameters
    assert yaml.load(open(result_path)) == best_parameters
示例#4
0
def test_optimizer_single_flow(tmpdir, config, sampler):
    eval_flow_runner = SingleFlowRunner(
        flow_yaml=os.path.join(cur_dir, 'flow.yml'),
        documents=document_generator(10),
        request_size=1,
        execution_endpoint='search',
    )
    grid_sampler_search_space = {
        'JINA_DUMMYCRAFTER_PARAM1': [0, 1],
        'JINA_DUMMYCRAFTER_PARAM2': [0, 1, 2],
        'JINA_DUMMYCRAFTER_PARAM3': [1],
    }
    opt = FlowOptimizer(
        flow_runner=eval_flow_runner,
        parameter_yaml=os.path.join(cur_dir, 'parameter.yml'),
        evaluation_callback=EvaluationCallback(),
        workspace_base_dir=str(tmpdir),
        n_trials=5,
        sampler=sampler,
    )
    if sampler == 'GridSampler':
        result = opt.optimize_flow(search_space=grid_sampler_search_space)
    else:
        result = opt.optimize_flow()
    validate_result(result, tmpdir)
示例#5
0
def test_optimizer_multi_flow(tmpdir, config):
    multi_flow_runner = MultiFlowRunner(
        [
            SingleFlowRunner(
                flow_yaml=os.path.join(cur_dir, 'flow.yml'),
                documents=document_generator(10),
                request_size=1,
                execution_endpoint='index',
            ),
            SingleFlowRunner(
                flow_yaml=os.path.join(cur_dir, 'flow.yml'),
                documents=document_generator(10),
                request_size=1,
                execution_endpoint='search',
            ),
        ]
    )
    opt = FlowOptimizer(
        flow_runner=multi_flow_runner,
        parameter_yaml=os.path.join(cur_dir, 'parameter.yml'),
        evaluation_callback=EvaluationCallback(),
        workspace_base_dir=str(tmpdir),
        n_trials=5,
    )
    result = opt.optimize_flow()
    validate_result(result, tmpdir)
示例#6
0
def test_optimizer_search_space(tmpdir, config):
    eval_flow_runner = SingleFlowRunner(
        flow_yaml=os.path.join(cur_dir, "flow.yml"),
        documents=document_generator(10),
        request_size=1,
        execution_endpoint="search",
    )
    _opt_kwargs = dict(
        flow_runner=eval_flow_runner,
        evaluation_callback=EvaluationCallback(),
        workspace_base_dir=str(tmpdir),
        n_trials=5,
        sampler="GridSampler",
    )

    # check the search space construction with valid parameters
    opt = FlowOptimizer(
        **_opt_kwargs,
        parameter_yaml=os.path.join(cur_dir, "parameters_search_space.yml"),
    )
    assert "JINA_PARAM1" in opt._search_space
    assert opt._search_space["JINA_PARAM1"] == [0, 1, 2]
    assert "JINA_PARAM2" in opt._search_space
    assert opt._search_space["JINA_PARAM2"] == ["foo", "bar"]
    assert "JINA_PARAM3" in opt._search_space
    assert opt._search_space["JINA_PARAM3"] == [1.0, 1.5, 2.0]
    assert "JINA_PARAM4" in opt._search_space
    assert opt._search_space["JINA_PARAM4"] == ["spam", "ham"]
    assert "JINA_PARAM4_SPAM_PARAM5" in opt._search_space
    assert opt._search_space["JINA_PARAM4_SPAM_PARAM5"] == [0, 1, 2]
    assert "JINA_PARAM4_HAM_PARAM56" in opt._search_space
    assert opt._search_space["JINA_PARAM4_HAM_PARAM56"] == [0, 1, 2]

    # check the search space construction with invalid uniform parameter
    _raised = False
    try:
        _ = FlowOptimizer(
            **_opt_kwargs,
            parameter_yaml=os.path.join(cur_dir, "parameters_search_space_uniform.yml"),
        )
    except NotImplementedError:
        _raised = True
    assert _raised

    # check the search space construction with invalid loguniform parameter
    _raised = False
    try:
        _ = FlowOptimizer(
            **_opt_kwargs,
            parameter_yaml=os.path.join(
                cur_dir, "parameters_search_space_loguniform.yml"
            ),
        )
    except NotImplementedError:
        _raised = True
    assert _raised
def test_optimizer_single_flow_option2(tmpdir, config):
    eval_flow_runner = SingleFlowRunner(
        flow_yaml=os.path.join(cur_dir, 'flow_pod_choice.yml'),
        documents=document_generator_option2(10),
        request_size=1,
        execution_endpoint='search',
    )
    opt = FlowOptimizer(
        flow_runner=eval_flow_runner,
        parameter_yaml=os.path.join(cur_dir, 'parameter_pod_choice.yml'),
        evaluation_callback=EvaluationCallback(),
        workspace_base_dir=str(tmpdir),
        n_trials=20,
    )
    result = opt.optimize_flow()
    assert (
        result.best_parameters['JINA_DUMMYCRAFTER_CHOICE'] == 'pods/craft_option2.yml'
    )
    assert result.best_parameters['JINA_DUMMYCRAFTER_PARAM4'] == 0
    assert result.best_parameters['JINA_DUMMYCRAFTER_PARAM5'] == 1
    assert result.best_parameters['JINA_DUMMYCRAFTER_PARAM6'] == 1
def callback(operator):
    return EvaluationCallback(EVAL_NAME, operator)