Пример #1
0
    def test_unsuccessful_jobs_fail_evaluation(self):
        submission = SubmissionFactory()
        evaluation = EvaluationFactory(submission=submission)
        AlgorithmEvaluationFactory(status=AlgorithmEvaluation.SUCCESS,
                                   submission=submission)
        AlgorithmEvaluationFactory(status=AlgorithmEvaluation.FAILURE,
                                   submission=submission)

        set_evaluation_inputs(evaluation_pk=evaluation.pk)

        evaluation.refresh_from_db()
        assert evaluation.status == evaluation.FAILURE
        assert (evaluation.output ==
                "The algorithm failed to execute on 1 images.")
Пример #2
0
    def test_set_evaluation_inputs(self):
        submission = SubmissionFactory()
        evaluation = EvaluationFactory(submission=submission)
        algorithms = AlgorithmEvaluationFactory.create_batch(
            2, status=AlgorithmEvaluation.SUCCESS, submission=submission)
        civs = ComponentInterfaceValueFactory.create_batch(2)

        for alg, civ in zip(algorithms, civs):
            alg.outputs.set([civ])

        set_evaluation_inputs(evaluation_pk=evaluation.pk)

        evaluation.refresh_from_db()
        assert evaluation.status == evaluation.PENDING
        assert evaluation.output == ""
        assert list(evaluation.inputs.all()) == civs
Пример #3
0
def test_setting_display_all_metrics(client, challenge_set):
    metrics = {"public": 3245.235, "secret": 4328.432, "extra": 2144.312}
    e = EvaluationFactory(
        submission__challenge=challenge_set.challenge,
        status=Evaluation.SUCCESS,
    )
    e.create_result(result=metrics)

    challenge_set.challenge.evaluation_config.score_jsonpath = "public"
    challenge_set.challenge.evaluation_config.extra_results_columns = [{
        "title":
        "extra",
        "path":
        "extra",
        "order":
        "asc"
    }]
    challenge_set.challenge.evaluation_config.display_all_metrics = True
    challenge_set.challenge.evaluation_config.save()

    response = get_view_for_user(
        client=client,
        viewname="evaluation:detail",
        challenge=challenge_set.challenge,
        reverse_kwargs={"pk": e.pk},
    )

    assert response.status_code == 200
    assert str(metrics["public"]) in response.rendered_content
    assert str(metrics["extra"]) in response.rendered_content
    assert str(metrics["secret"]) in response.rendered_content

    challenge_set.challenge.evaluation_config.display_all_metrics = False
    challenge_set.challenge.evaluation_config.save()

    response = get_view_for_user(
        client=client,
        viewname="evaluation:detail",
        challenge=challenge_set.challenge,
        reverse_kwargs={"pk": e.pk},
    )

    assert response.status_code == 200
    assert str(metrics["public"]) in response.rendered_content
    assert str(metrics["extra"]) in response.rendered_content
    assert str(metrics["secret"]) not in response.rendered_content
Пример #4
0
def test_public_private_default():
    c = ChallengeFactory()

    r1 = EvaluationFactory(submission__challenge=c)

    assert r1.published is True

    c.evaluation_config.auto_publish_new_results = False
    c.evaluation_config.save()

    r2 = EvaluationFactory(submission__challenge=c)

    assert r2.published is False

    # The public/private status should only update on first save
    r1.save()
    assert r1.published is True
Пример #5
0
def test_evaluation_detail(client, eval_challenge_set):
    submission = SubmissionFactory(
        challenge=eval_challenge_set.challenge_set.challenge,
        creator=eval_challenge_set.challenge_set.participant,
    )
    e = EvaluationFactory(submission=submission)
    validate_open_view(
        viewname="evaluation:detail",
        challenge_set=eval_challenge_set.challenge_set,
        reverse_kwargs={"pk": e.pk},
        client=client,
    )
Пример #6
0
def test_duration():
    j = AlgorithmJobFactory()
    _ = EvaluationFactory()

    jbs = Job.objects.with_duration()
    assert jbs[0].duration is None
    assert Job.objects.average_duration() is None

    now = timezone.now()
    j.started_at = now - timedelta(minutes=5)
    j.completed_at = now
    j.save()

    jbs = Job.objects.with_duration()
    assert jbs[0].duration == timedelta(minutes=5)
    assert Job.objects.average_duration() == timedelta(minutes=5)

    _ = AlgorithmJobFactory()
    assert Job.objects.average_duration() == timedelta(minutes=5)
Пример #7
0
def test_null_results():
    challenge = ChallengeFactory()

    results = [{"a": 0.6}, {"a": None}]

    queryset = [
        EvaluationFactory(submission__challenge=challenge,
                          status=Evaluation.SUCCESS)
        for _ in range(len(results))
    ]

    for e, r in zip(queryset, results):
        e.create_result(result=r)

    challenge.evaluation_config.score_jsonpath = "a"
    challenge.evaluation_config.result_display_choice = Config.ALL
    challenge.evaluation_config.save()

    calculate_ranks(challenge_pk=challenge.pk)

    expected_ranks = [1, 0]
    assert_ranks(queryset, expected_ranks)
Пример #8
0
def test_results_display():
    challenge = ChallengeFactory()

    user1 = UserFactory()
    user2 = UserFactory()

    metrics = "metrics"
    creator = "creator"

    results = [
        {
            metrics: {
                "b": 0.3
            },
            creator: user1
        },  # Invalid result
        {
            metrics: {
                "a": 0.6
            },
            creator: user1
        },
        {
            metrics: {
                "a": 0.4
            },
            creator: user1
        },
        {
            metrics: {
                "a": 0.2
            },
            creator: user1
        },
        {
            metrics: {
                "a": 0.1
            },
            creator: user2
        },
        {
            metrics: {
                "a": 0.5
            },
            creator: user2
        },
        {
            metrics: {
                "a": 0.3
            },
            creator: user2
        },
    ]

    queryset = [
        EvaluationFactory(
            submission__challenge=challenge,
            submission__creator=r[creator],
            status=Evaluation.SUCCESS,
        ) for r in results
    ]

    for e, r in zip(queryset, results):
        e.create_result(result=r[metrics])

    challenge.evaluation_config.score_jsonpath = "a"
    challenge.evaluation_config.result_display_choice = Config.ALL
    challenge.evaluation_config.save()

    calculate_ranks(challenge_pk=challenge.pk)

    expected_ranks = [0, 1, 3, 5, 6, 2, 4]
    assert_ranks(queryset, expected_ranks)

    challenge.evaluation_config.result_display_choice = Config.MOST_RECENT
    challenge.evaluation_config.save()

    calculate_ranks(challenge_pk=challenge.pk)

    expected_ranks = [0, 0, 0, 2, 0, 0, 1]
    assert_ranks(queryset, expected_ranks)

    challenge.evaluation_config.result_display_choice = Config.BEST
    challenge.evaluation_config.save()

    calculate_ranks(challenge_pk=challenge.pk)

    expected_ranks = [0, 1, 0, 0, 0, 2, 0]
    assert_ranks(queryset, expected_ranks)

    # now test reverse order
    challenge.evaluation_config.score_default_sort = (
        challenge.evaluation_config.ASCENDING)
    challenge.evaluation_config.save()

    calculate_ranks(challenge_pk=challenge.pk)

    expected_ranks = [0, 0, 0, 2, 1, 0, 0]
    assert_ranks(queryset, expected_ranks)

    challenge.evaluation_config.result_display_choice = Config.MOST_RECENT
    challenge.evaluation_config.save()

    calculate_ranks(challenge_pk=challenge.pk)

    expected_ranks = [0, 0, 0, 1, 0, 0, 2]
    assert_ranks(queryset, expected_ranks)
Пример #9
0
def test_calculate_ranks():
    challenge = ChallengeFactory()

    results = [
        # Warning: Do not change this values without updating the
        # expected_ranks below.
        {
            "a": 0.0,
            "b": 0.0
        },
        {
            "a": 0.5,
            "b": 0.2
        },
        {
            "a": 1.0,
            "b": 0.3
        },
        {
            "a": 0.7,
            "b": 0.4
        },
        {
            "a": 0.5,
            "b": 0.5
        },
        # Following two are invalid if relative ranking is used
        {
            "a": 1.0
        },
        {
            "b": 0.3
        },
        # Add a valid, but unpublished result
        {
            "a": 0.1,
            "b": 0.1
        },
    ]

    queryset = [
        EvaluationFactory(submission__challenge=challenge,
                          status=Evaluation.SUCCESS)
        for _ in range(len(results))
    ]

    for e, r in zip(queryset, results):
        e.create_result(result=r)

    # Unpublish the result
    queryset[-1].published = False
    queryset[-1].save()

    expected = {
        Config.DESCENDING: {
            Config.ABSOLUTE: {
                Config.DESCENDING: {
                    "ranks": [6, 4, 1, 3, 4, 1, 0, 0],
                    "rank_scores": [6, 4, 1, 3, 4, 1, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [6, 4, 1, 3, 4, 1, 0, 0],
                    "rank_scores": [6, 4, 1, 3, 4, 1, 0, 0],
                },
            },
            Config.MEDIAN: {
                Config.DESCENDING: {
                    "ranks": [5, 4, 1, 1, 1, 0, 0, 0],
                    "rank_scores": [5, 3.5, 2, 2, 2, 0, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [3, 2, 1, 3, 5, 0, 0, 0],
                    "rank_scores": [3, 2.5, 2, 3, 4, 0, 0, 0],
                },
            },
            Config.MEAN: {
                Config.DESCENDING: {
                    "ranks": [5, 4, 1, 1, 1, 0, 0, 0],
                    "rank_scores": [5, 3.5, 2, 2, 2, 0, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [3, 2, 1, 3, 5, 0, 0, 0],
                    "rank_scores": [3, 2.5, 2, 3, 4, 0, 0, 0],
                },
            },
        },
        Config.ASCENDING: {
            Config.ABSOLUTE: {
                Config.DESCENDING: {
                    "ranks": [1, 2, 5, 4, 2, 5, 0, 0],
                    "rank_scores": [1, 2, 5, 4, 2, 5, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [1, 2, 5, 4, 2, 5, 0, 0],
                    "rank_scores": [1, 2, 5, 4, 2, 5, 0, 0],
                },
            },
            Config.MEDIAN: {
                Config.DESCENDING: {
                    "ranks": [2, 2, 5, 2, 1, 0, 0, 0],
                    "rank_scores": [3, 3, 4, 3, 1.5, 0, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [1, 2, 4, 4, 3, 0, 0, 0],
                    "rank_scores": [1, 2, 4, 4, 3.5, 0, 0, 0],
                },
            },
            Config.MEAN: {
                Config.DESCENDING: {
                    "ranks": [2, 2, 5, 2, 1, 0, 0, 0],
                    "rank_scores": [3, 3, 4, 3, 1.5, 0, 0, 0],
                },
                Config.ASCENDING: {
                    "ranks": [1, 2, 4, 4, 3, 0, 0, 0],
                    "rank_scores": [1, 2, 4, 4, 3.5, 0, 0, 0],
                },
            },
        },
    }

    for score_method in (Config.ABSOLUTE, Config.MEDIAN, Config.MEAN):
        for a_order in (Config.DESCENDING, Config.ASCENDING):
            for b_order in (Config.DESCENDING, Config.ASCENDING):
                challenge.evaluation_config.score_jsonpath = "a"
                challenge.evaluation_config.scoring_method_choice = (
                    score_method)
                challenge.evaluation_config.score_default_sort = a_order
                challenge.evaluation_config.extra_results_columns = [{
                    "path":
                    "b",
                    "title":
                    "b",
                    "order":
                    b_order
                }]
                challenge.evaluation_config.save()

                calculate_ranks(challenge_pk=challenge.pk)

                assert_ranks(
                    queryset,
                    expected[a_order][score_method][b_order]["ranks"],
                    expected[a_order][score_method][b_order]["rank_scores"],
                )
Пример #10
0
def submission_and_evaluation(*, challenge, creator):
    """Creates a submission and an evaluation for that submission."""
    s = SubmissionFactory(challenge=challenge, creator=creator)
    e = EvaluationFactory(submission=s)
    return s, e
Пример #11
0
def test_mark_long_running_jobs_failed():
    # Started jobs should be unaffected
    j1 = EvaluationFactory()
    j1.update_status(status=EvaluationJob.STARTED)

    # Long running jobs should be marked as failed
    j2 = EvaluationFactory()
    j2.update_status(status=EvaluationJob.STARTED)
    j2.started_at = timezone.now() - timedelta(days=1)
    j2.save()

    # A job that has not been started should not be marked as failed, even if
    # if it is outside the celery task limit
    j3 = EvaluationFactory()
    j3.created -= timedelta(days=1)
    j3.save()

    # Algorithm jobs should not be affected
    a = AlgorithmJobFactory()
    a.update_status(status=AlgorithmJob.STARTED)

    assert EvaluationJob.objects.all().count() == 3
    assert (AlgorithmJob.objects.filter(
        status=AlgorithmJob.STARTED).count() == 1)
    assert (EvaluationJob.objects.filter(
        status=EvaluationJob.FAILURE).count() == 0)

    assert j1.status == EvaluationJob.STARTED
    assert j2.status == EvaluationJob.STARTED
    assert j3.status == EvaluationJob.PENDING
    assert a.status == AlgorithmJob.STARTED

    mark_long_running_jobs_failed(app_label="evaluation",
                                  model_name="evaluation")

    j1.refresh_from_db()
    j2.refresh_from_db()
    j3.refresh_from_db()
    a.refresh_from_db()

    assert j1.status == EvaluationJob.STARTED
    assert j2.status == EvaluationJob.FAILURE
    assert j3.status == EvaluationJob.PENDING
    assert a.status == AlgorithmJob.STARTED