Пример #1
0
    def __prediction(self, predict_season: int, non_mol_kde: gaussian_kde, mol_kde: gaussian_kde) -> Dict[Player, float]:
        """ Execute the prediction phase of the Age Layer.

        Arguments:
            predict_season (int): For which season we make the prediction.
            non_mol_kde (gaussian_kde): The Kernel Density Estimator for non-Mol ages.
            mol_kde (gaussian_kde): The Kernel Density Estimator for Mol ages.

        Returns:
            A dictionary with as key the players that participated in the prediction season and as value their Mol
            likelihood based on their age.
        """
        all_predictions = dict()
        predict_data = {player: float(get_age(player)) for player in get_players_in_season(predict_season)}
        min_value = InnerAppearanceLayer.get_boundary(non_mol_kde, mol_kde, len(predict_data),
                                                      self.__cdf_cutoff / 2, self.MIN_VALUE, self.MAX_VALUE)
        max_value = InnerAppearanceLayer.get_boundary(non_mol_kde, mol_kde, len(predict_data),
                                                      1 - self.__cdf_cutoff / 2, self.MIN_VALUE, self.MAX_VALUE)
        for player, age in predict_data.items():
            age = min(max(age, min_value), max_value)
            non_mol_likelihood = non_mol_kde.pdf(age)[0] * (len(predict_data) - 1) / len(predict_data)
            mol_likelihood = mol_kde.pdf(age)[0] / len(predict_data)
            all_predictions[player] = mol_likelihood / (non_mol_likelihood + mol_likelihood)

        return all_predictions
Пример #2
0
    def __predict(
            self, predict_season: int, latest_episode: int,
            predict_data: Dict[Player, List[np.array]],
            classifier: LogisticRegression) -> Dict[Player, MultiLayerResult]:
        """ Execute the prediction phase of the Money Layer.

        Arguments:
            predict_season (int): The season for which the predictions are made.
            latest_episode (int): The latest episode useable in the predict season.
            predict_data (List[PredictSample]): The prediction data with features used to make predictions.
            classifier (LogisticRegression): The machine learning model used to make predictions.

        Returns:
            A dictionary with as key the players that participated in the prediction season and as value a
            MultiLayerResult which contains the predictions.
        """
        all_predictions = dict()
        season_players = get_players_in_season(predict_season)
        for player in season_players:
            all_predictions[player] = []

        alive_players = MONEY_DATA[predict_season].get_alive(latest_episode)
        for player, all_rows in predict_data.items():
            for row in all_rows:
                likelihood = classifier.predict_proba(np.array([row]))[0][1]
                all_predictions[player] = all_predictions[player] + [
                    likelihood
                ]

        return {player: MultiLayerResult(np.array(predictions), player not in alive_players) for player, predictions in \
                all_predictions.items()}
Пример #3
0
    def __prediction(self, extractor: AppearanceExtractor, classifier: Classifier, predict_season: int) -> \
            Dict[Player, MultiLayerResult]:
        """ Execute the prediction phase of the Appearance Layer.

        Arguments:
            extractor (AppearanceExtractor): The extractor which delivers the prediction data.
            classifier (Classifier): A classifier which classifies players as either Mol or non-Mol based on how often
                they appear.
            predict_season (int): For which season we make the prediction.

        Returns:
            A dictionary with as key the players that participated in the prediction season and as value a
            MultiLayerResult which contains the predictions.
        """
        all_predictions = dict()
        predict_data = extractor.get_predict_data()
        if not predict_data:
            return EmptyMultiLayer().predict(predict_season, 0, set())

        for player in get_players_in_season(predict_season):
            if player in predict_data:
                predictions = []
                for data in predict_data[player]:
                    predictions.append(classifier.predict_proba([data]))
                all_predictions[player] = MultiLayerResult(np.array(predictions), False)
            else:
                all_predictions[player] = MultiLayerResult(np.array([]), True)

        return all_predictions
Пример #4
0
 def compute_distribution(self, predict_season: int, latest_episode: int,
                          train_seasons: Set[int]) -> Dict[Player, float]:
     season_players = get_players_in_season(predict_season)
     season_exclusions = MANUAL_EXCLUSIONS.get(predict_season, [])
     distribution = {player: 1.0 for player in season_players}
     for excluded_player, known_from_episode in season_exclusions:
         if latest_episode >= known_from_episode:
             distribution[excluded_player] = 0.0
     return distribution
Пример #5
0
 def compute_distribution(self, predict_season: int, latest_episode: int,
                          train_seasons: Set[int]) -> Dict[Player, float]:
     if predict_season in SUSPICION_DATA:
         exclude_players = SUSPICION_DATA[predict_season]
         distribution = dict()
         for player in get_players_in_season(predict_season):
             distribution[
                 player] = self.__EPSILON if player in exclude_players else 1.0
         return distribution
     else:
         return EqualLayer().compute_distribution(predict_season,
                                                  latest_episode,
                                                  train_seasons)
Пример #6
0
    def __predict(
            self, predict_season: int, latest_episode: int,
            predict_data: List[PredictSample],
            in_classifier: LogisticRegression,
            out_classifier: LogisticRegression
    ) -> Dict[Player, MultiLayerResult]:
        """ Execute the prediction phase of the Exam Drop Layer.

        Arguments:
            predict_season (int): The season for which the predictions are made.
            latest_episode (int): The latest episode useable in the predict season.
            predict_data (List[PredictSample]): The prediction data with features used to make predictions.
            in_classifier (LogisticRegression): The machine learning model used to make predictions for cases where
                a player is in the answer.
            out_classifier (LogisticRegression): The machine learning model used to make predictions for cases where
                a player is out the answer.

        Returns:
            A dictionary with as key the players that participated in the prediction season and as value a
            MultiLayerResult which contains the predictions.
        """
        all_predictions = dict()
        season_players = get_players_in_season(predict_season)
        for player in season_players:
            all_predictions[player] = []

        alive_players = EXAM_DATA[predict_season].get_alive_players(
            latest_episode)
        for data in predict_data:
            in_likelihood = in_classifier.predict_proba(
                np.array([data.features]))[0][1]
            out_likelihood = out_classifier.predict_proba(
                np.array([data.features]))[0][1]
            if out_likelihood < in_likelihood:
                in_likelihood = out_likelihood = 1 / len(alive_players)
            in_likelihood = in_likelihood**data.weight
            out_likelihood = out_likelihood**data.weight
            for player in data.in_answer:
                all_predictions[player] = all_predictions[player] + [
                    in_likelihood
                ]
            for player in data.out_answer:
                all_predictions[player] = all_predictions[player] + [
                    out_likelihood
                ]

        return {
            player: MultiLayerResult(np.array(predictions), player
                                     not in alive_players)
            for player, predictions in all_predictions.items()
        }
Пример #7
0
    def compute_distribution(self, predict_season: int, latest_episode: int, train_seasons: Set[int]) -> Dict[Player, float]:
        available_seasons = EXAM_DATA.keys()
        train_seasons = train_seasons.intersection(available_seasons)
        if predict_season not in available_seasons:
            return EqualLayer().compute_distribution(predict_season, latest_episode, train_seasons)

        estimator = self.__train(train_seasons)
        alive_players = EXAM_DATA[predict_season].get_alive_players(latest_episode)
        result = {player: 1.0 if player in alive_players else 0.0 for player in get_players_in_season(predict_season)}
        for episode in EXAM_DATA[predict_season].episodes.values():
            if episode.id > latest_episode or episode.result.drop != DropType.EXECUTION_DROP:
                continue
            prediction = self.__predict_for_episode(episode, alive_players, estimator)
            for player, likelihood in prediction.items():
                result[player] *= likelihood
        return result
Пример #8
0
    def parse_raw(self, season: int,
                  dictionary: enchant.Dict) -> Dict[Player, WikipediaData]:
        """ Parse the Wikipedia files of all players that participated in this season to counts.

        Parameters:
            season (int): The season for which we want to compute all counts of the players that participated in it.
            dictionary (enchant.Dict): The dictionary instance which checks if something is a word.

        Returns:
            Dict[Player, WikipediaData]: A dictionary with as key the players and as value a Wikipedia Data tuple with
                as first value a counter of all job for this player and as second value the total number of words in the
                players Wikipedia page.
        """
        raw_data = dict()
        for player in get_players_in_season(season):
            raw_data[player] = WikipediaParser.extract_player_features(
                player, dictionary)
        return raw_data
Пример #9
0
    def __prediction(self, extractor: AppearanceExtractor,
                     non_mol_kde: gaussian_kde, mol_kde: gaussian_kde,
                     predict_season: int) -> Dict[Player, MultiLayerResult]:
        """ Execute the prediction phase of the Appearance Layer.

        Arguments:
            extractor (AppearanceExtractor): The extractor which delivers the prediction data.
            non_mol_kde (gaussian_kde): The Kernel Density Estimator for non-Mol appearance values.
            mol_kde (gaussian_kde): The Kernel Density Estimator for Mol appearance values.
            predict_season (int): For which season we make the prediction.

        Returns:
            A dictionary with as key the players that participated in the prediction season and as value a
            MultiLayerResult which contains the predictions.
        """
        all_predictions = dict()
        predict_data = extractor.get_predict_data()
        if not predict_data:
            return EmptyMultiLayer().predict(predict_season, 0, set())

        min_value = self.get_boundary(non_mol_kde, mol_kde, len(predict_data),
                                      self.__cdf_cutoff / 2, self.MIN_VALUE,
                                      self.MAX_VALUE)
        max_value = self.get_boundary(non_mol_kde, mol_kde, len(predict_data),
                                      1 - self.__cdf_cutoff / 2,
                                      self.MIN_VALUE, self.MAX_VALUE)
        for player in get_players_in_season(predict_season):
            if player in predict_data:
                predictions = []
                for data in predict_data[player]:
                    data = min(max(data, min_value), max_value)
                    non_mol_likelihood = non_mol_kde.pdf(data)[0] * (
                        len(predict_data) - 1) / len(predict_data)
                    mol_likelihood = mol_kde.pdf(data)[0] / len(predict_data)
                    likelihood = mol_likelihood / (non_mol_likelihood +
                                                   mol_likelihood)
                    predictions.append(likelihood)
                all_predictions[player] = MultiLayerResult(
                    np.array(predictions), False)
            else:
                all_predictions[player] = MultiLayerResult(np.array([]), True)

        return all_predictions
Пример #10
0
    def __get_players_with_episodes(
            self, season: int,
            parsed_videos: Dict[int, ParsedVideo]) -> Dict[Player, Set[int]]:
        """ Get a dictionary of players that participated in this season with the corresponding episodes in which these
        players participated.

        Parameters:
            season (int): The season for which we compute this.
            parsed_videos (Dict[int, ParsedVideo]): All the parsed videos from the episodes of that season.

        Returns:
            A dictionary with as key the players of that season and as value a set of episodes in which they
            participated.
        """
        player_episodes = dict()
        for player in get_players_in_season(season):
            episode_occurrences = {
                episode
                for episode, data in parsed_videos.items()
                if player in data.alive_players
            }
            player_episodes[player] = episode_occurrences
        return player_episodes
Пример #11
0
 def compute_distribution(self, predict_season: int, latest_episode: int,
                          train_seasons: Set[int]) -> Dict[Player, float]:
     season_players = get_players_in_season(predict_season)
     likelihood = 1 / len(season_players)
     return {player: likelihood for player in season_players}
Пример #12
0
from Layers.Special.MemoryLayer import MemoryLayer
from scipy.stats import pearsonr, kendalltau
import math

TRAIN_SEASONS = {
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
}
TEST_SEASONS = {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}
layer1 = MemoryLayer("Wikipedia Stacker")
layer2 = MemoryLayer("Appearance Stacker")

pairs = []
for season in TEST_SEASONS:
    players = {
        player
        for player in get_players_in_season(season) if not get_is_mol(player)
    }
    for episode in range(get_last_episode(season) + 1):
        prediction1 = layer1.compute_distribution(season, episode,
                                                  TRAIN_SEASONS)
        prediction2 = layer2.compute_distribution(season, episode,
                                                  TRAIN_SEASONS)
        excluded = {
            player
            for player, prob in prediction1.items() if prob == 0.0
        }
        excluded.update(
            {player
             for player, prob in prediction2.items() if prob == 0.0})
        included = players.difference(excluded)
Пример #13
0
 def predict(self, predict_season: int, latest_episode: int, train_seasons: Set[int]) -> Dict[Player, MultiLayerResult]:
     season_players = get_players_in_season(predict_season)
     return {player: MultiLayerResult(np.array([]), self.__is_excluded_player(player)) for player in season_players}
Пример #14
0
 def compute_distribution(self, predict_season: int, latest_episode: int, train_seasons: Set[int]) -> Dict[Player, float]:
     return {player: 1.0 if get_is_mol(player) else 0.0 for player in get_players_in_season(predict_season)}