def __prediction(self, predict_season: int, non_mol_kde: gaussian_kde, mol_kde: gaussian_kde) -> Dict[Player, float]: """ Execute the prediction phase of the Age Layer. Arguments: predict_season (int): For which season we make the prediction. non_mol_kde (gaussian_kde): The Kernel Density Estimator for non-Mol ages. mol_kde (gaussian_kde): The Kernel Density Estimator for Mol ages. Returns: A dictionary with as key the players that participated in the prediction season and as value their Mol likelihood based on their age. """ all_predictions = dict() predict_data = {player: float(get_age(player)) for player in get_players_in_season(predict_season)} min_value = InnerAppearanceLayer.get_boundary(non_mol_kde, mol_kde, len(predict_data), self.__cdf_cutoff / 2, self.MIN_VALUE, self.MAX_VALUE) max_value = InnerAppearanceLayer.get_boundary(non_mol_kde, mol_kde, len(predict_data), 1 - self.__cdf_cutoff / 2, self.MIN_VALUE, self.MAX_VALUE) for player, age in predict_data.items(): age = min(max(age, min_value), max_value) non_mol_likelihood = non_mol_kde.pdf(age)[0] * (len(predict_data) - 1) / len(predict_data) mol_likelihood = mol_kde.pdf(age)[0] / len(predict_data) all_predictions[player] = mol_likelihood / (non_mol_likelihood + mol_likelihood) return all_predictions
def __predict( self, predict_season: int, latest_episode: int, predict_data: Dict[Player, List[np.array]], classifier: LogisticRegression) -> Dict[Player, MultiLayerResult]: """ Execute the prediction phase of the Money Layer. Arguments: predict_season (int): The season for which the predictions are made. latest_episode (int): The latest episode useable in the predict season. predict_data (List[PredictSample]): The prediction data with features used to make predictions. classifier (LogisticRegression): The machine learning model used to make predictions. Returns: A dictionary with as key the players that participated in the prediction season and as value a MultiLayerResult which contains the predictions. """ all_predictions = dict() season_players = get_players_in_season(predict_season) for player in season_players: all_predictions[player] = [] alive_players = MONEY_DATA[predict_season].get_alive(latest_episode) for player, all_rows in predict_data.items(): for row in all_rows: likelihood = classifier.predict_proba(np.array([row]))[0][1] all_predictions[player] = all_predictions[player] + [ likelihood ] return {player: MultiLayerResult(np.array(predictions), player not in alive_players) for player, predictions in \ all_predictions.items()}
def __prediction(self, extractor: AppearanceExtractor, classifier: Classifier, predict_season: int) -> \ Dict[Player, MultiLayerResult]: """ Execute the prediction phase of the Appearance Layer. Arguments: extractor (AppearanceExtractor): The extractor which delivers the prediction data. classifier (Classifier): A classifier which classifies players as either Mol or non-Mol based on how often they appear. predict_season (int): For which season we make the prediction. Returns: A dictionary with as key the players that participated in the prediction season and as value a MultiLayerResult which contains the predictions. """ all_predictions = dict() predict_data = extractor.get_predict_data() if not predict_data: return EmptyMultiLayer().predict(predict_season, 0, set()) for player in get_players_in_season(predict_season): if player in predict_data: predictions = [] for data in predict_data[player]: predictions.append(classifier.predict_proba([data])) all_predictions[player] = MultiLayerResult(np.array(predictions), False) else: all_predictions[player] = MultiLayerResult(np.array([]), True) return all_predictions
def compute_distribution(self, predict_season: int, latest_episode: int, train_seasons: Set[int]) -> Dict[Player, float]: season_players = get_players_in_season(predict_season) season_exclusions = MANUAL_EXCLUSIONS.get(predict_season, []) distribution = {player: 1.0 for player in season_players} for excluded_player, known_from_episode in season_exclusions: if latest_episode >= known_from_episode: distribution[excluded_player] = 0.0 return distribution
def compute_distribution(self, predict_season: int, latest_episode: int, train_seasons: Set[int]) -> Dict[Player, float]: if predict_season in SUSPICION_DATA: exclude_players = SUSPICION_DATA[predict_season] distribution = dict() for player in get_players_in_season(predict_season): distribution[ player] = self.__EPSILON if player in exclude_players else 1.0 return distribution else: return EqualLayer().compute_distribution(predict_season, latest_episode, train_seasons)
def __predict( self, predict_season: int, latest_episode: int, predict_data: List[PredictSample], in_classifier: LogisticRegression, out_classifier: LogisticRegression ) -> Dict[Player, MultiLayerResult]: """ Execute the prediction phase of the Exam Drop Layer. Arguments: predict_season (int): The season for which the predictions are made. latest_episode (int): The latest episode useable in the predict season. predict_data (List[PredictSample]): The prediction data with features used to make predictions. in_classifier (LogisticRegression): The machine learning model used to make predictions for cases where a player is in the answer. out_classifier (LogisticRegression): The machine learning model used to make predictions for cases where a player is out the answer. Returns: A dictionary with as key the players that participated in the prediction season and as value a MultiLayerResult which contains the predictions. """ all_predictions = dict() season_players = get_players_in_season(predict_season) for player in season_players: all_predictions[player] = [] alive_players = EXAM_DATA[predict_season].get_alive_players( latest_episode) for data in predict_data: in_likelihood = in_classifier.predict_proba( np.array([data.features]))[0][1] out_likelihood = out_classifier.predict_proba( np.array([data.features]))[0][1] if out_likelihood < in_likelihood: in_likelihood = out_likelihood = 1 / len(alive_players) in_likelihood = in_likelihood**data.weight out_likelihood = out_likelihood**data.weight for player in data.in_answer: all_predictions[player] = all_predictions[player] + [ in_likelihood ] for player in data.out_answer: all_predictions[player] = all_predictions[player] + [ out_likelihood ] return { player: MultiLayerResult(np.array(predictions), player not in alive_players) for player, predictions in all_predictions.items() }
def compute_distribution(self, predict_season: int, latest_episode: int, train_seasons: Set[int]) -> Dict[Player, float]: available_seasons = EXAM_DATA.keys() train_seasons = train_seasons.intersection(available_seasons) if predict_season not in available_seasons: return EqualLayer().compute_distribution(predict_season, latest_episode, train_seasons) estimator = self.__train(train_seasons) alive_players = EXAM_DATA[predict_season].get_alive_players(latest_episode) result = {player: 1.0 if player in alive_players else 0.0 for player in get_players_in_season(predict_season)} for episode in EXAM_DATA[predict_season].episodes.values(): if episode.id > latest_episode or episode.result.drop != DropType.EXECUTION_DROP: continue prediction = self.__predict_for_episode(episode, alive_players, estimator) for player, likelihood in prediction.items(): result[player] *= likelihood return result
def parse_raw(self, season: int, dictionary: enchant.Dict) -> Dict[Player, WikipediaData]: """ Parse the Wikipedia files of all players that participated in this season to counts. Parameters: season (int): The season for which we want to compute all counts of the players that participated in it. dictionary (enchant.Dict): The dictionary instance which checks if something is a word. Returns: Dict[Player, WikipediaData]: A dictionary with as key the players and as value a Wikipedia Data tuple with as first value a counter of all job for this player and as second value the total number of words in the players Wikipedia page. """ raw_data = dict() for player in get_players_in_season(season): raw_data[player] = WikipediaParser.extract_player_features( player, dictionary) return raw_data
def __prediction(self, extractor: AppearanceExtractor, non_mol_kde: gaussian_kde, mol_kde: gaussian_kde, predict_season: int) -> Dict[Player, MultiLayerResult]: """ Execute the prediction phase of the Appearance Layer. Arguments: extractor (AppearanceExtractor): The extractor which delivers the prediction data. non_mol_kde (gaussian_kde): The Kernel Density Estimator for non-Mol appearance values. mol_kde (gaussian_kde): The Kernel Density Estimator for Mol appearance values. predict_season (int): For which season we make the prediction. Returns: A dictionary with as key the players that participated in the prediction season and as value a MultiLayerResult which contains the predictions. """ all_predictions = dict() predict_data = extractor.get_predict_data() if not predict_data: return EmptyMultiLayer().predict(predict_season, 0, set()) min_value = self.get_boundary(non_mol_kde, mol_kde, len(predict_data), self.__cdf_cutoff / 2, self.MIN_VALUE, self.MAX_VALUE) max_value = self.get_boundary(non_mol_kde, mol_kde, len(predict_data), 1 - self.__cdf_cutoff / 2, self.MIN_VALUE, self.MAX_VALUE) for player in get_players_in_season(predict_season): if player in predict_data: predictions = [] for data in predict_data[player]: data = min(max(data, min_value), max_value) non_mol_likelihood = non_mol_kde.pdf(data)[0] * ( len(predict_data) - 1) / len(predict_data) mol_likelihood = mol_kde.pdf(data)[0] / len(predict_data) likelihood = mol_likelihood / (non_mol_likelihood + mol_likelihood) predictions.append(likelihood) all_predictions[player] = MultiLayerResult( np.array(predictions), False) else: all_predictions[player] = MultiLayerResult(np.array([]), True) return all_predictions
def __get_players_with_episodes( self, season: int, parsed_videos: Dict[int, ParsedVideo]) -> Dict[Player, Set[int]]: """ Get a dictionary of players that participated in this season with the corresponding episodes in which these players participated. Parameters: season (int): The season for which we compute this. parsed_videos (Dict[int, ParsedVideo]): All the parsed videos from the episodes of that season. Returns: A dictionary with as key the players of that season and as value a set of episodes in which they participated. """ player_episodes = dict() for player in get_players_in_season(season): episode_occurrences = { episode for episode, data in parsed_videos.items() if player in data.alive_players } player_episodes[player] = episode_occurrences return player_episodes
def compute_distribution(self, predict_season: int, latest_episode: int, train_seasons: Set[int]) -> Dict[Player, float]: season_players = get_players_in_season(predict_season) likelihood = 1 / len(season_players) return {player: likelihood for player in season_players}
from Layers.Special.MemoryLayer import MemoryLayer from scipy.stats import pearsonr, kendalltau import math TRAIN_SEASONS = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 } TEST_SEASONS = {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21} layer1 = MemoryLayer("Wikipedia Stacker") layer2 = MemoryLayer("Appearance Stacker") pairs = [] for season in TEST_SEASONS: players = { player for player in get_players_in_season(season) if not get_is_mol(player) } for episode in range(get_last_episode(season) + 1): prediction1 = layer1.compute_distribution(season, episode, TRAIN_SEASONS) prediction2 = layer2.compute_distribution(season, episode, TRAIN_SEASONS) excluded = { player for player, prob in prediction1.items() if prob == 0.0 } excluded.update( {player for player, prob in prediction2.items() if prob == 0.0}) included = players.difference(excluded)
def predict(self, predict_season: int, latest_episode: int, train_seasons: Set[int]) -> Dict[Player, MultiLayerResult]: season_players = get_players_in_season(predict_season) return {player: MultiLayerResult(np.array([]), self.__is_excluded_player(player)) for player in season_players}
def compute_distribution(self, predict_season: int, latest_episode: int, train_seasons: Set[int]) -> Dict[Player, float]: return {player: 1.0 if get_is_mol(player) else 0.0 for player in get_players_in_season(predict_season)}