Пример #1
0
    def test_earliest_date_time_without_results(self):
        FullMatchFactory(
            start_date_time=timezone.localtime() - timedelta(days=1),
            home_team_match__score=50,
            away_team_match__score=80,
        )

        FullMatchFactory(
            start_date_time=timezone.localtime() + timedelta(days=1),
            home_team_match__score=0,
            away_team_match__score=0,
        )

        with self.subTest("when all matches have results or haven't been played"):
            earliest_date_time_without_results = (
                Match.earliest_date_time_without_results()
            )

            self.assertIsNone(earliest_date_time_without_results)

        played_resultless = FullMatchFactory(
            start_date_time=timezone.localtime() - timedelta(days=1),
            home_team_match__score=0,
            away_team_match__score=0,
        )

        earliest_date_time_without_results = Match.earliest_date_time_without_results()

        self.assertEqual(
            played_resultless.start_date_time, earliest_date_time_without_results
        )
Пример #2
0
    def setUp(self):
        self.maxDiff = None
        self.client = Client(schema)

        self.ml_models = [
            MLModelFactory(
                name=model_name,
                is_principal=(idx == 0),
                used_in_competitions=True,
                # pylint: disable=unsubscriptable-object
                prediction_type=PredictionType.values[idx],
            ) for idx, model_name in enumerate(MODEL_NAMES)
        ]

        self.matches = [
            FullMatchFactory(
                with_predictions=True,
                year=year,
                round_number=((round_n % 23) + 1),
                start_date_time=timezone.make_aware(
                    datetime(year, 6, (round_n % 29) + 1, match_n * 5)),
                prediction__ml_model=self.ml_models[0],
                prediction_two__ml_model=self.ml_models[1],
            ) for year in range(*YEAR_RANGE) for round_n in range(ROUND_COUNT)
            for match_n in range(MATCH_COUNT)
        ]
Пример #3
0
    def setUp(self):
        today = datetime.now(tz=MELBOURNE_TIMEZONE)
        year = today.year

        self.match_results_data = fake_match_results_data(
            ROW_COUNT, (year, year + 1))

        # Save records in DB
        ml_model = MLModelFactory(name="tipresias")

        for match_data in self.match_results_data.to_dict("records"):
            match_date = (match_data["date"].to_pydatetime().replace(
                tzinfo=MELBOURNE_TIMEZONE))
            match_attrs = {
                "start_date_time": match_date,
                "round_number": match_data["round_number"],
                "venue": match_data["venue"],
            }
            prediction_attrs = {
                "prediction__ml_model":
                ml_model,
                "prediction__predicted_winner__name":
                np.random.choice(
                    [match_data["home_team"], match_data["away_team"]]),
            }
            team_match_attrs = {
                "home_team_match__team__name": match_data["home_team"],
                "away_team_match__team__name": match_data["away_team"],
                "home_team_match__score": match_data["home_score"],
                "away_team_match__score": match_data["away_score"],
            }
            FullMatchFactory(**match_attrs, **prediction_attrs,
                             **team_match_attrs)

        self.send_email_command = send_email.Command()
Пример #4
0
    def test_played_without_results(self):
        FullMatchFactory(
            start_date_time=timezone.localtime() - timedelta(days=1),
            home_team_match__score=0,
            away_team_match__score=0,
        )
        FullMatchFactory(
            start_date_time=timezone.localtime() - timedelta(days=1),
            home_team_match__score=50,
            away_team_match__score=80,
        )
        FullMatchFactory(
            start_date_time=timezone.localtime() + timedelta(days=1),
            home_team_match__score=0,
            away_team_match__score=0,
        )

        played_matches_without_results = Match.played_without_results()

        self.assertEqual(played_matches_without_results.count(), 1)
Пример #5
0
def main():
    """Set up test DB and seed it with FactoryBoy records."""
    assert settings.ENVIRONMENT == "test"

    current_year = date.today().year
    first_season = current_year - N_SEASONS + 1

    seasons_rounds_matches = product(
        range(first_season, current_year + 1),
        range(1, N_ROUNDS_PER_SEASON + 1),
        range(N_MATCHES_PER_ROUND),
    )

    connections["default"].creation.create_test_db(autoclobber=True)
    # create_test_db should set the default DB to test_${DATABASE_NAME}
    assert settings.DATABASES["default"]["NAME"] != os.getenv("DATABASE_NAME")

    with transaction.atomic():
        # We need exactly one principal model, and one of each prediction type
        # among competition models.
        MLModelFactory(
            is_principal=True,
            used_in_competitions=True,
            prediction_type=PredictionType.MARGIN,
        )
        MLModelFactory(
            used_in_competitions=True, prediction_type=PredictionType.WIN_PROBABILITY
        )

        for _ in range(N_ML_MODELS - 2):
            MLModelFactory()

        for season, round_number, _match in seasons_rounds_matches:
            # We need some future matches for functionality around predictions,
            # so we're arbitrarily making the second half of the current season
            # sometime after today
            future_match = (
                season == date.today().year and round_number > N_ROUNDS_PER_SEASON / 2
            )
            FullMatchFactory(
                with_predictions=True,
                year=season,
                round_number=round_number,
                future=future_match,
            )
Пример #6
0
    def test_update_results(self, mock_update_result):
        match_results = data_factories.fake_match_results_data()
        calls = []

        for _idx, match_result in match_results.iterrows():
            FullMatchFactory(
                home_team_match__score=0,
                away_team_match__score=0,
                start_date_time=match_result["date"],
                round_number=match_result["round_number"],
                home_team_match__team__name=match_result["home_team"],
                away_team_match__team__name=match_result["away_team"],
                venue=match_result["venue"],
            )
            calls.append(call(match_result))

        Match.update_results(match_results)

        self.assertEqual(mock_update_result.call_count, len(match_results))
Пример #7
0
    def setUp(self):
        self.maxDiff = None
        self.client = Client(schema)

        ml_models = [MLModelFactory(name=model_name) for model_name in MODEL_NAMES]

        self.matches = [
            FullMatchFactory(
                year=year,
                round_number=((idx % 23) + 1),
                start_date_time=datetime(
                    year, 6, (idx % 29) + 1, tzinfo=MELBOURNE_TIMEZONE
                ),
                prediction__ml_model=ml_models[0],
                prediction_two__ml_model=ml_models[1],
            )
            for year in range(*YEAR_RANGE)
            for idx in range(ROUND_COUNT)
        ]
Пример #8
0
    def setUp(self):
        self.match_results_data = fake_match_results_data(
            seasons=(PREDICTION_YEAR, PREDICTION_YEAR + 1)
        )

        ml_models = [
            MLModel.objects.get(is_principal=True),
            MLModel.objects.filter(
                is_principal=False, used_in_competitions=True
            ).first(),
        ]

        for match_data in self.match_results_data.to_dict("records"):
            match_date = timezone.localtime(match_data["date"].to_pydatetime())
            match_attrs = {
                "start_date_time": match_date,
                "round_number": match_data["round_number"],
                "venue": match_data["venue"],
            }
            prediction_attrs = {
                "prediction__ml_model": ml_models[0],
                "prediction__predicted_winner__name": np.random.choice(
                    [match_data["home_team"], match_data["away_team"]]
                ),
                "prediction_two__ml_model": ml_models[1],
                "prediction_two__predicted_winner__name": np.random.choice(
                    [match_data["home_team"], match_data["away_team"]]
                ),
            }
            team_match_attrs = {
                "home_team_match__team__name": match_data["home_team"],
                "away_team_match__team__name": match_data["away_team"],
                "home_team_match__score": match_data["home_score"],
                "away_team_match__score": match_data["away_score"],
            }
            FullMatchFactory(
                with_predictions=True,
                **match_attrs,
                **prediction_attrs,
                **team_match_attrs,
            )

        self.send_email_command = send_email.Command()
Пример #9
0
    def test_update_result(self):
        with self.subTest("When the match hasn't been played yet"):
            match = FullMatchFactory(
                future=True,
                with_predictions=True,
                home_team_match__score=0,
                away_team_match__score=0,
            )

            match.update_result(pd.DataFrame())

            # It doesn't update match scores
            score_sum = sum(match.teammatch_set.values_list("score", flat=True))
            self.assertEqual(score_sum, 0)
            # It doesn't update prediction correctness
            self.assertEqual(
                match.prediction_set.filter(is_correct__in=[True, False]).count(),
                0,
            )
            # It doesn't update match winner or margin
            self.assertIsNone(match.winner)
            self.assertIsNone(match.margin)

        with self.subTest("When the match doesn't have results yet"):
            with self.subTest("and has been played within the last week"):
                yesterday = timezone.now() - timedelta(days=1)

                match = FullMatchFactory(
                    with_predictions=True,
                    start_date_time=yesterday,
                    home_team_match__score=0,
                    away_team_match__score=0,
                    prediction__is_correct=None,
                    prediction_two__is_correct=None,
                )
                match.winner = None
                match.margin = None

                match.update_result(pd.DataFrame())

                # It doesn't update match scores
                score_sum = sum(match.teammatch_set.values_list("score", flat=True))
                self.assertEqual(score_sum, 0)
                # It doesn't update prediction correctness
                self.assertEqual(
                    match.prediction_set.filter(is_correct__in=[True, False]).count(),
                    0,
                )
                # It doesn't update match winner or margin
                self.assertIsNone(match.winner)
                self.assertIsNone(match.margin)

            with self.subTest("and has been played over a week ago"):
                eight_days_ago = timezone.now() - timedelta(days=8)

                match = FullMatchFactory(
                    with_predictions=True,
                    start_date_time=eight_days_ago,
                    home_team_match__score=0,
                    away_team_match__score=0,
                    prediction__is_correct=None,
                    prediction_two__is_correct=None,
                )

                with self.assertRaisesRegex(
                    AssertionError, "Didn't find any match data rows"
                ):
                    match.update_result(pd.DataFrame())

                # It doesn't update match scores
                score_sum = sum(match.teammatch_set.values_list("score", flat=True))
                self.assertEqual(score_sum, 0)
                # It doesn't update prediction correctness
                self.assertEqual(
                    match.prediction_set.filter(is_correct__in=[True, False]).count(),
                    0,
                )

        match_results = data_factories.fake_match_results_data()
        match_result = match_results.iloc[0, :]

        match = FullMatchFactory(
            with_predictions=True,
            home_team_match__score=0,
            away_team_match__score=0,
            start_date_time=match_result["date"],
            round_number=match_result["round_number"],
            home_team_match__team__name=match_result["home_team"],
            away_team_match__team__name=match_result["away_team"],
        )
        winner_name = (
            match_result["home_team"]
            if match_result["home_score"] > match_result["away_score"]
            else match_result["away_team"]
        )

        winner = Team.objects.get(name=winner_name)
        match.prediction_set.update(predicted_winner=winner)
        # We expect a data frame, so can't reuse the match_result series
        match.update_result(match_results.iloc[:1, :])

        # It updates match scores
        match_scores = set(match.teammatch_set.values_list("score", flat=True))
        match_data_scores = set(match_result[["home_score", "away_score"]])
        self.assertEqual(match_scores, match_data_scores)
        # It updates prediction correctness
        self.assertGreaterEqual(match.prediction_set.filter(is_correct=True).count(), 1)
        # It updates match winner and margin
        winner_is_correct = match.winner == winner
        self.assertTrue(winner_is_correct or match.is_draw)
        self.assertEqual(match.margin, max(match_scores) - min(match_scores))
Пример #10
0
    def test_fetch_latest_round_stats(self):
        ml_models = list(MLModel.objects.all())
        YEAR = TWENTY_SEVENTEEN
        MONTH = 6

        latest_matches = [
            FullMatchFactory(
                year=YEAR,
                round_number=(idx + 1),
                start_date_time=datetime(
                    YEAR, MONTH, (idx % 29) + 1, tzinfo=MELBOURNE_TIMEZONE
                ),
                prediction__ml_model=ml_models[0],
                prediction__is_correct=True,
                prediction_two__ml_model=ml_models[1],
                prediction_two__is_correct=True,
            )
            for idx in range(ROUND_COUNT)
        ]

        query = """
            query QueryType {
                fetchLatestRoundStats(mlModelName: "accurate_af") {
                    seasonYear
                    roundNumber
                    modelStats {
                        modelName
                        cumulativeCorrectCount
                        cumulativeMeanAbsoluteError
                        cumulativeMarginDifference
                    }
                }
            }
            """

        executed = self.client.execute(query)

        data = executed["data"]["fetchLatestRoundStats"]

        max_match_round = max([match.round_number for match in latest_matches])
        self.assertEqual(data["roundNumber"], max_match_round)

        max_match_year = max([match.year for match in latest_matches])
        self.assertEqual(max_match_year, data["seasonYear"])

        model_stats = data["modelStats"]

        self.assertEqual("accurate_af", model_stats["modelName"])

        self.assertGreater(model_stats["cumulativeCorrectCount"], 0)
        self.assertGreater(model_stats["cumulativeMeanAbsoluteError"], 0)
        self.assertGreater(model_stats["cumulativeMarginDifference"], 0)

        with self.subTest("when the last matches haven't been played yet"):
            DAY = 3

            with freeze_time(
                f"{YEAR}-0{MONTH}-0{DAY}", tz_offset=-HOURS_FROM_UTC_TO_MELBOURNE
            ):
                past_executed = self.client.execute(query)

                data = past_executed["data"]["fetchLatestRoundStats"]

                self.assertLess(data["roundNumber"], max_match_round)
                # Last played match will be from day before, because "now" and the
                # start time for "today's match" are equal
                self.assertEqual(data["roundNumber"], DAY - 1)

                model_stats = data["modelStats"]

                self.assertGreater(model_stats["cumulativeCorrectCount"], 0)
                self.assertGreater(model_stats["cumulativeMeanAbsoluteError"], 0)
                self.assertGreater(model_stats["cumulativeMarginDifference"], 0)
Пример #11
0
    def test_fetch_latest_round_predictions(self):
        ml_models = list(MLModel.objects.all())
        year = TWENTY_SEVENTEEN

        latest_matches = [
            FullMatchFactory(
                year=year,
                round_number=((idx % 23) + 1),
                start_date_time=datetime(
                    year, 6, (idx % 29) + 1, tzinfo=MELBOURNE_TIMEZONE
                ),
                prediction__ml_model=ml_models[0],
                prediction_two__ml_model=ml_models[1],
            )
            for idx in range(ROUND_COUNT)
        ]

        executed = self.client.execute(
            """
            query QueryType {
                fetchLatestRoundPredictions {
                    roundNumber
                    matches {
                        startDateTime
                        predictions { predictedWinner { name } predictedMargin }
                        winner { name }
                        homeTeam { name }
                        awayTeam { name }
                    }
                }
            }
            """
        )

        data = executed["data"]["fetchLatestRoundPredictions"]
        max_match_round = max([match.round_number for match in latest_matches])

        self.assertEqual(data["roundNumber"], max_match_round)

        match_years = [
            parser.parse(match["startDateTime"]).year for match in data["matches"]
        ]

        self.assertEqual(np.mean(match_years), year)

        with self.subTest("with an mlModelName argument"):
            executed_ml_name = self.client.execute(
                """
                query QueryType {
                    fetchLatestRoundPredictions {
                        roundNumber
                        matches {
                            startDateTime
                            predictions(mlModelName: "accurate_af") {
                                mlModel { name }
                            }
                        }
                    }
                }
                """
            )

            data = executed_ml_name["data"]["fetchLatestRoundPredictions"]
            predictions = itertools.chain.from_iterable(
                [match["predictions"] for match in data["matches"]]
            )
            ml_model_names = [pred["mlModel"]["name"] for pred in predictions]

            self.assertEqual(ml_model_names, ["accurate_af"])
Пример #12
0
    def test_fetch_yearly_predictions(self):
        year = 2015
        ml_model_names = (
            Match.objects.filter(start_date_time__year=year)
            .distinct("prediction__ml_model__name")
            .values_list("prediction__ml_model__name", flat=True)
        )

        ml_models = list(MLModel.objects.filter(name__in=ml_model_names))

        # Have to make sure at least one match has a different round_number to compare
        # later rounds to earlier ones
        FullMatchFactory(
            year=year,
            round_number=50,
            prediction__ml_model=ml_models[0],
            prediction_two__ml_model=ml_models[1],
        )

        executed = self.client.execute(
            """
            query QueryType {
                fetchYearlyPredictions(year: 2015) {
                    seasonYear
                    predictionModelNames
                    predictionsByRound {
                        roundNumber
                        modelPredictions { modelName, cumulativeCorrectCount }
                        matches { predictions { isCorrect } }
                    }
                }
            }
            """
        )

        data = executed["data"]["fetchYearlyPredictions"]

        self.assertEqual(set(data["predictionModelNames"]), set(ml_model_names))
        self.assertEqual(data["seasonYear"], 2015)

        predictions = data["predictionsByRound"]

        earlier_round = predictions[0]
        later_round = predictions[1]

        self.assertLessEqual(earlier_round["roundNumber"], later_round["roundNumber"])

        earlier_round_cum_counts = [
            prediction["cumulativeCorrectCount"]
            for prediction in earlier_round["modelPredictions"]
        ]
        earlier_round_correct = [
            prediction["isCorrect"]
            for match in earlier_round["matches"]
            for prediction in match["predictions"]
        ]

        # Regression test to make sure cumulative counts are being calculated correctly
        self.assertEqual(sum(earlier_round_cum_counts), sum(earlier_round_correct))

        later_round_cum_counts = [
            prediction["cumulativeCorrectCount"]
            for prediction in later_round["modelPredictions"]
        ]
        later_round_correct = [
            prediction["isCorrect"]
            for match in later_round["matches"]
            for prediction in match["predictions"]
        ]

        # Regression test to make sure cumulative counts are being calculated correctly
        self.assertEqual(
            sum(earlier_round_correct + later_round_correct),
            sum(later_round_cum_counts),
        )

        self.assertLessEqual(sum(earlier_round_cum_counts), sum(later_round_cum_counts))
Пример #13
0
    def test_fetch_latest_round_metrics(self):
        ml_models = list(MLModel.objects.filter(name__in=MODEL_NAMES))
        YEAR = TWENTY_SEVENTEEN
        MONTH = 6

        for round_n in range(ROUND_COUNT):
            for match_n in range(MATCH_COUNT):
                FullMatchFactory(
                    with_predictions=True,
                    year=YEAR,
                    round_number=(round_n + 1),
                    start_date_time=timezone.make_aware(
                        datetime(YEAR, MONTH, (round_n % 29) + 1,
                                 match_n * 5)),
                    prediction__ml_model=ml_models[0],
                    prediction__force_correct=True,
                    prediction_two__ml_model=ml_models[1],
                    prediction_two__force_correct=True,
                )

        query = """
            query {
                fetchLatestRoundMetrics {
                    season
                    roundNumber
                    cumulativeCorrectCount
                    cumulativeAccuracy
                    cumulativeMeanAbsoluteError
                    cumulativeMarginDifference
                    cumulativeBits
                }
            }
        """

        executed = self.client.execute(query)
        data = executed["data"]["fetchLatestRoundMetrics"]

        self.assertEqual(data["season"], YEAR)
        self.assertEqual(data["roundNumber"], ROUND_COUNT)

        # We force all predictions to be correct, so the correct count should just be
        # the number of matches
        self.assertEqual(data["cumulativeCorrectCount"],
                         ROUND_COUNT * MATCH_COUNT)
        self.assertGreater(data["cumulativeMeanAbsoluteError"], 0)
        self.assertEqual(
            round(data["cumulativeMarginDifference"]),
            round(data["cumulativeMeanAbsoluteError"] * ROUND_COUNT *
                  MATCH_COUNT),
        )
        self.assertGreater(data["cumulativeAccuracy"], 0)
        # Bits can be positive or negative, so we just want to make sure it's not 0,
        # which would suggest a problem
        self.assertNotEqual(data["cumulativeBits"], 0)

        with self.subTest(
                "when the last matches don't have updated results yet"):
            TeamMatch.objects.filter(
                match__start_date_time__year=YEAR,
                match__round_number=ROUND_COUNT).update(score=0)

            executed = self.client.execute(query)
            data = executed["data"]["fetchLatestRoundMetrics"]

            # It fetches latest round with results
            self.assertEqual(data["roundNumber"], ROUND_COUNT - 1)

        with self.subTest("when the last matches haven't been played yet"):
            DAY = 3
            fake_datetime = timezone.make_aware(datetime(YEAR, MONTH, DAY))

            with freeze_time(fake_datetime):
                past_executed = self.client.execute(
                    query, variables={"mlModelName": "predictanator"})

                data = past_executed["data"]["fetchLatestRoundMetrics"]

                max_match_round = max(
                    Match.objects.all(),
                    key=lambda match: match.round_number).round_number
                self.assertLess(data["roundNumber"], max_match_round)
                # Last played match will be from day before, because "now" and the
                # start time for "today's match" are equal
                self.assertEqual(data["roundNumber"], DAY - 1)

                self.assertGreater(data["cumulativeCorrectCount"], 0)
                self.assertGreater(data["cumulativeMeanAbsoluteError"], 0)
                self.assertGreater(data["cumulativeMarginDifference"], 0)
Пример #14
0
    def test_fetch_season_model_metrics_cumulative_metrics(self):
        ml_models = list(MLModel.objects.filter(name__in=MODEL_NAMES))
        YEAR = TWENTY_SEVENTEEN
        MONTH = 6

        for round_n in range(ROUND_COUNT):
            for match_n in range(MATCH_COUNT):
                FullMatchFactory(
                    with_predictions=True,
                    year=YEAR,
                    round_number=(round_n + 1),
                    start_date_time=timezone.make_aware(
                        datetime(YEAR, MONTH, (round_n % 29) + 1,
                                 match_n * 5)),
                    prediction__ml_model=ml_models[0],
                    prediction__force_correct=True,
                    prediction_two__ml_model=ml_models[1],
                    prediction_two__force_correct=True,
                )

        query = """
            query($mlModelName: String) {
                fetchSeasonModelMetrics(season: 2017) {
                    season
                    roundModelMetrics(roundNumber: -1) {
                        roundNumber
                        modelMetrics(mlModelName: $mlModelName) {
                            mlModel { name }
                            cumulativeCorrectCount
                            cumulativeMeanAbsoluteError
                            cumulativeMarginDifference
                            cumulativeAccuracy
                            cumulativeBits
                        }
                    }
                }
            }
            """

        with self.subTest("for a 'Win Probability' model"):
            executed = self.client.execute(
                query, variables={"mlModelName": "accurate_af"})

            data = executed["data"]["fetchSeasonModelMetrics"][
                "roundModelMetrics"][0]["modelMetrics"]

            self.assertEqual(len(data), 1)
            model_stats = data[0]
            self.assertEqual("accurate_af", model_stats["mlModel"]["name"])

            self.assertGreater(model_stats["cumulativeCorrectCount"], 0)
            self.assertEqual(model_stats["cumulativeMeanAbsoluteError"], 0)
            self.assertEqual(model_stats["cumulativeMarginDifference"], 0)
            self.assertGreater(model_stats["cumulativeAccuracy"], 0)
            # Bits can be positive or negative, so we just want to make sure it's not 0,
            # which would suggest a problem
            self.assertNotEqual(model_stats["cumulativeBits"], 0)

        with self.subTest("for a 'Margin' model"):
            executed = self.client.execute(
                query, variables={"mlModelName": "predictanator"})

            data = executed["data"]["fetchSeasonModelMetrics"][
                "roundModelMetrics"][0]["modelMetrics"]

            self.assertEqual(len(data), 1)
            model_stats = data[0]
            self.assertEqual("predictanator", model_stats["mlModel"]["name"])

            self.assertGreater(model_stats["cumulativeCorrectCount"], 0)
            self.assertGreater(model_stats["cumulativeMeanAbsoluteError"], 0)
            self.assertGreater(model_stats["cumulativeMarginDifference"], 0)
            self.assertGreater(model_stats["cumulativeAccuracy"], 0)
            self.assertEqual(model_stats["cumulativeBits"], 0)

        with self.subTest(
                "when the last matches in the DB haven't been played yet"):
            DAY = 3
            fake_datetime = timezone.make_aware(datetime(YEAR, MONTH, DAY))

            with freeze_time(fake_datetime):
                # Need to set scores for "future" matches to 0
                TeamMatch.objects.filter(
                    match__start_date_time__gte=fake_datetime).update(score=0)
                Match.objects.filter(
                    start_date_time__gte=fake_datetime).update(winner=None,
                                                               margin=None)

                past_executed = self.client.execute(
                    query, variables={"mlModelName": "predictanator"})

                data = past_executed["data"]["fetchSeasonModelMetrics"][
                    "roundModelMetrics"][0]

                max_match_round = max(
                    Match.objects.all(),
                    key=lambda match: match.round_number).round_number
                self.assertLess(data["roundNumber"], max_match_round)
                # Last played match will be from day before, because "now" and the
                # start time for "today's match" are equal
                self.assertEqual(data["roundNumber"], DAY - 1)

                model_stats = data["modelMetrics"][0]

                self.assertGreater(model_stats["cumulativeCorrectCount"], 0)
                self.assertGreater(model_stats["cumulativeMeanAbsoluteError"],
                                   0)
                self.assertGreater(model_stats["cumulativeMarginDifference"],
                                   0)
Пример #15
0
    def test_fetch_latest_round_predictions(self):
        ml_models = list(MLModel.objects.all())
        latest_year = TWENTY_SEVENTEEN

        latest_matches = [
            FullMatchFactory(
                with_predictions=True,
                year=latest_year,
                round_number=((round_n % 23) + 1),
                start_date_time=timezone.make_aware(
                    datetime(latest_year, 6, (round_n % 29) + 1)),
                prediction__ml_model=ml_models[0],
                prediction__force_correct=True,
                prediction_two__ml_model=ml_models[1],
                prediction_two__force_incorrect=True,
            ) for round_n in range(ROUND_COUNT)
        ]

        query_string = """
            query QueryType {
                fetchLatestRoundPredictions {
                    roundNumber
                    matchPredictions {
                        startDateTime
                        predictedWinner
                        predictedMargin
                        predictedWinProbability
                        isCorrect
                    }
                }
            }
        """

        executed = self.client.execute(query_string)
        data = executed["data"]["fetchLatestRoundPredictions"]

        # It returns predictions from the last available round
        max_match_round = max([match.round_number for match in latest_matches])
        self.assertEqual(data["roundNumber"], max_match_round)

        # It returns predictions from the last available season
        match_years = [
            parser.parse(pred["startDateTime"]).year
            for pred in data["matchPredictions"]
        ]
        self.assertEqual(np.mean(match_years), latest_year)

        # It uses predicted winners from the principal model only
        principal_predicted_winners = Prediction.objects.filter(
            match__start_date_time__year=latest_year,
            match__round_number=max_match_round,
            ml_model__is_principal=True,
        ).values_list("predicted_winner__name", flat=True)
        query_predicted_winners = [
            pred["predictedWinner"] for pred in data["matchPredictions"]
        ]
        self.assertEqual(sorted(principal_predicted_winners),
                         sorted(query_predicted_winners))

        # When models disagree, it inverts predictions from non-principal models
        non_principal_prediction_type = MLModel.objects.get(
            is_principal=False, used_in_competitions=True).prediction_type

        if non_principal_prediction_type == "Margin":
            non_principal_prediction_label = "predictedMargin"
            draw_prediction = 0
        else:
            non_principal_prediction_label = "predictedWinProbability"
            draw_prediction = 0.5

        predicted_losses = [
            pred[non_principal_prediction_label] <= draw_prediction
            for pred in data["matchPredictions"]
        ]

        self.assertTrue(all(predicted_losses))

        with self.subTest("for unplayed matches"):
            max_round_number = max(
                [match.round_number for match in latest_matches])

            for _ in range(MATCH_COUNT):
                FullMatchFactory(
                    with_predictions=True,
                    future=True,
                    round_number=max_round_number + 1,
                    prediction__ml_model=ml_models[0],
                    prediction_two__ml_model=ml_models[1],
                )

            executed = self.client.execute(query_string)
            data = executed["data"]["fetchLatestRoundPredictions"]

            # It returns isCorrect values of null/None
            unique_is_correct_values = {
                pred["isCorrect"]
                for pred in data["matchPredictions"]
            }
            self.assertEqual(set([None]), unique_is_correct_values)

            with self.subTest("that don't have predictions yet"):
                Prediction.objects.filter(
                    match__start_date_time__gt=timezone.now()).delete()

                executed = self.client.execute(query_string)
                data = executed["data"]["fetchLatestRoundPredictions"]

                # It returns predictions from the last round that has them
                self.assertEqual(data["roundNumber"], max_round_number)

        with self.subTest("without predictions from a non-principal model"):
            Prediction.objects.filter(ml_model__is_principal=False).delete()

            executed = self.client.execute(query_string)
            data = executed["data"]["fetchLatestRoundPredictions"]

            self.assertGreater(len(data["matchPredictions"]), 0)
Пример #16
0
    def test_fetch_season_model_metrics(self):
        year = 2015
        ml_model_names = (Match.objects.filter(start_date_time__year=year).
                          distinct("prediction__ml_model__name").values_list(
                              "prediction__ml_model__name", flat=True))

        ml_models = list(MLModel.objects.filter(name__in=ml_model_names))

        # Have to make sure at least one match has a different round_number to compare
        # later rounds to earlier ones
        match_round_number = 50
        match_start_date_time = timezone.make_aware(datetime(year, 10, 31))
        FullMatchFactory(
            with_predictions=True,
            year=year,
            round_number=match_round_number,
            start_date_time=match_start_date_time,
            prediction__ml_model=ml_models[0],
            prediction_two__ml_model=ml_models[1],
        )

        query = """
            query($season: Int) {
                fetchSeasonModelMetrics(season: $season) {
                    season
                    roundModelMetrics {
                        roundNumber
                        modelMetrics {
                            mlModel { name }
                            cumulativeCorrectCount
                            cumulativeAccuracy
                            cumulativeMeanAbsoluteError
                            cumulativeMarginDifference
                            cumulativeBits
                        }
                    }
                }
            }
        """
        variables = {"season": year}
        executed = self.client.execute(query, variables=variables)

        data = executed["data"]["fetchSeasonModelMetrics"]

        self.assertEqual(data["season"], year)

        predictions = data["roundModelMetrics"]

        for pred in predictions:
            for model_metric in pred["modelMetrics"]:
                self.assertGreaterEqual(model_metric["cumulativeAccuracy"],
                                        0.0)
                self.assertLessEqual(model_metric["cumulativeAccuracy"], 1.0)

        earlier_round = predictions[0]
        later_round = predictions[1]

        self.assertLess(earlier_round["roundNumber"],
                        later_round["roundNumber"])

        earlier_round_cum_correct = [
            prediction["cumulativeCorrectCount"]
            for prediction in earlier_round["modelMetrics"]
        ]
        earlier_round_cum_accuracy = [
            prediction["cumulativeAccuracy"]
            for prediction in earlier_round["modelMetrics"]
        ]

        earlier_round_correct = Prediction.objects.filter(
            match__start_date_time__year=year,
            match__round_number=earlier_round["roundNumber"],
        ).values_list("is_correct", flat=True)

        # Regression tests to make sure cumulative counts and cumulative accuracy
        # are being calculated correctly
        self.assertEqual(sum(earlier_round_cum_correct),
                         sum(earlier_round_correct))
        self.assertEqual(
            round(
                sum(earlier_round_cum_accuracy) /
                len(earlier_round_cum_accuracy), 4),
            round(sum(earlier_round_correct) / len(earlier_round_correct), 4),
        )

        later_round_cum_correct = [
            prediction["cumulativeCorrectCount"]
            for prediction in later_round["modelMetrics"]
        ]
        later_round_correct = Prediction.objects.filter(
            match__start_date_time__year=year,
            match__round_number=later_round["roundNumber"],
        ).values_list("is_correct", flat=True)

        # Regression test to make sure cumulative counts are being calculated correctly
        self.assertEqual(
            sum(earlier_round_correct) + sum(later_round_correct),
            sum(later_round_cum_correct),
        )

        self.assertLessEqual(sum(earlier_round_cum_correct),
                             sum(later_round_cum_correct))

        with self.subTest("when no matches have been played yet this year"):
            unplayed_season = max(YEAR_RANGE)

            with freeze_time(
                    timezone.make_aware(datetime(unplayed_season, 1, 2))):
                FullMatchFactory(
                    with_predictions=True,
                    year=unplayed_season,
                    prediction__ml_model=ml_models[0],
                    prediction_two__ml_model=ml_models[1],
                )

                executed = self.client.execute(
                    query, variables={"season": unplayed_season})
                self.assertEqual(executed.get("errors"), None)

        with self.subTest("with mlModelName argument 'predictanator'"):
            executed = self.client.execute("""
                query QueryType {
                    fetchSeasonModelMetrics(season: 2015) {
                        roundModelMetrics {
                            modelMetrics(mlModelName: "predictanator") {
                                mlModel { name }
                            }
                        }
                    }
                }
                """)

            data = executed["data"]["fetchSeasonModelMetrics"][
                "roundModelMetrics"][0]

            self.assertEqual(len(data["modelMetrics"]), 1)
            self.assertEqual(data["modelMetrics"][0]["mlModel"]["name"],
                             "predictanator")

        with self.subTest("with roundNumber argument of -1"):
            executed = self.client.execute("""
                query QueryType {
                    fetchSeasonModelMetrics(season: 2015) {
                        roundModelMetrics(roundNumber: -1) { roundNumber }
                    }
                }
                """)

            data = executed["data"]["fetchSeasonModelMetrics"][
                "roundModelMetrics"]

            self.assertEqual(len(data), 1)

            max_round_number = max(
                Match.objects.all(),
                key=lambda match: match.round_number).round_number
            self.assertEqual(data[0]["roundNumber"], max_round_number)