def __get_train_data(self, train_seasons: Set[int], alives_group: Set[int]) -> Tuple[np.array, np.array]: """ Get the train data used to train the stacking algorithm. Arguments: train_seasons (Set[int]): All seasons used for training the stacking algorithm. alives_group (Set[int]): Only episodes with any of these numbers of players that could still potentially be the mol are used as training data. Returns: The train input which is a 2d matrix where each row represents a train case. Also it returns the train output which is a 1d array where each value indicates whether the corresponding train case row was the mol or not. """ train_input = [] train_output = [] for train_season in train_seasons: for episode in range(get_last_episode(train_season) + 1): train_prediction = self.__train_layer.predict(train_season, episode, train_seasons.difference({train_season})) train_alive = {player for player, result in train_prediction.items() if not result.exclusion} if len(train_alive) in alives_group: for player, result in train_prediction.items(): if not result.exclusion: train_input.append(self.__input_encoding(result, len(train_alive))) train_output.append(1.0 if get_is_mol(player) else 0.0) return np.array(train_input), np.array(train_output)
def filter_prediction_episode_num( distributions: Dict[Tuple[int, int], Dict[Player, float]], episode_num: int, episode_groups: int) -> Dict[Tuple[int, int], Dict[Player, float]]: """ Select only the predictions with a certain episode number. Arguments: distributions (Dict[Tuple[int, int], Dict[Player, float]]): All the predictions. episode_num (int): The episode number which get selected. episode_groups (int): The number of episode groups in which we group all predictions based on episode number. Returns: The prediction filtered on episode number. """ seasons = {id[0] for id in distributions.keys()} dis = dict() for season in seasons: num = int( round(get_last_episode(season) * episode_num / episode_groups)) dis[(season, num)] = distributions[(season, num)] return dis
from Layers.Special.MemoryLayer import MemoryLayer from Layers.Special.PotentialMolLayer import PotentialMolLayer from Layers.Wikipedia.WikipediaLayer import WikipediaLayer from Validators.PieChartCreator import PieChartCreator from Validators.Precomputer import Precomputer from Validators.TotalLogLoss import TotalLogLoss from Validators.ValidationMetrics import ValidationMetrics RANDOM_SEED = 949019755 VALIDATE_SEASONS = {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21} TRAIN_SEASONS = {5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21} distributions = dict() random_generator = RandomState(RANDOM_SEED) moldel = PotentialMolLayer() total_tasks = sum([get_last_episode(season) + 1 for season in VALIDATE_SEASONS]) progress_bar = Bar("Distributions Computed:", max = total_tasks) for season in VALIDATE_SEASONS: train_seasons = TRAIN_SEASONS.difference({season}) for episode in range(get_last_episode(season) + 1): distributions[(season, episode)] = moldel.compute_distribution(season, episode, train_seasons) progress_bar.next() progress_bar.finish() # validator = Precomputer("Full Moldel Predictions") validator = PieChartCreator("Uniform (9-21)") # validator = ValidationMetrics(9, [10, 9, 8, 7, 6, 5, 4, 3, 2]) # validator = TotalLogLoss() validator.validate(distributions)
import math TRAIN_SEASONS = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 } TEST_SEASONS = {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21} layer1 = MemoryLayer("Wikipedia Stacker") layer2 = MemoryLayer("Appearance Stacker") pairs = [] for season in TEST_SEASONS: players = { player for player in get_players_in_season(season) if not get_is_mol(player) } for episode in range(get_last_episode(season) + 1): prediction1 = layer1.compute_distribution(season, episode, TRAIN_SEASONS) prediction2 = layer2.compute_distribution(season, episode, TRAIN_SEASONS) excluded = { player for player, prob in prediction1.items() if prob == 0.0 } excluded.update( {player for player, prob in prediction2.items() if prob == 0.0}) included = players.difference(excluded) num_players = len(included) + 1 uniform = 1 / num_players