def __prediction(self, extractor: AppearanceExtractor, classifier: Classifier, predict_season: int) -> \ Dict[Player, MultiLayerResult]: """ Execute the prediction phase of the Appearance Layer. Arguments: extractor (AppearanceExtractor): The extractor which delivers the prediction data. classifier (Classifier): A classifier which classifies players as either Mol or non-Mol based on how often they appear. predict_season (int): For which season we make the prediction. Returns: A dictionary with as key the players that participated in the prediction season and as value a MultiLayerResult which contains the predictions. """ all_predictions = dict() predict_data = extractor.get_predict_data() if not predict_data: return EmptyMultiLayer().predict(predict_season, 0, set()) for player in get_players_in_season(predict_season): if player in predict_data: predictions = [] for data in predict_data[player]: predictions.append(classifier.predict_proba([data])) all_predictions[player] = MultiLayerResult(np.array(predictions), False) else: all_predictions[player] = MultiLayerResult(np.array([]), True) return all_predictions
def predict(self, predict_season: int, latest_episode: int, train_seasons: Set[int]) -> Dict[Player, np.array]: train_seasons = {season for season in train_seasons if season >= self.__first_season} max_episode = self.__latest_available_episode(predict_season, latest_episode) if max_episode == 0 or predict_season < self.__first_season: return EmptyMultiLayer().predict(predict_season, latest_episode, train_seasons) extractor = AppearanceExtractor(predict_season, max_episode, train_seasons, self.__aug_num_cuts, self.__aug_min_cuts_on) classifier = self.__training(extractor) return self.__prediction(extractor, classifier, predict_season)
def __training(self, extractor: AppearanceExtractor) -> Classifier: """ Execute the training phase of the Appearance Layer. Arguments: extractor (AppearanceExtractor): The extractor which delivers the training data. Returns: A classifier which classifies players as either Mol or non-Mol based on how often they appear. """ train_input, train_output = extractor.get_train_data() classifier = NaiveKDEClassifier(cdf_cutoff = self.__cdf_cutoff) classifier.train(train_input, train_output) return classifier
def __prediction(self, extractor: AppearanceExtractor, non_mol_kde: gaussian_kde, mol_kde: gaussian_kde, predict_season: int) -> Dict[Player, MultiLayerResult]: """ Execute the prediction phase of the Appearance Layer. Arguments: extractor (AppearanceExtractor): The extractor which delivers the prediction data. non_mol_kde (gaussian_kde): The Kernel Density Estimator for non-Mol appearance values. mol_kde (gaussian_kde): The Kernel Density Estimator for Mol appearance values. predict_season (int): For which season we make the prediction. Returns: A dictionary with as key the players that participated in the prediction season and as value a MultiLayerResult which contains the predictions. """ all_predictions = dict() predict_data = extractor.get_predict_data() if not predict_data: return EmptyMultiLayer().predict(predict_season, 0, set()) min_value = self.get_boundary(non_mol_kde, mol_kde, len(predict_data), self.__cdf_cutoff / 2, self.MIN_VALUE, self.MAX_VALUE) max_value = self.get_boundary(non_mol_kde, mol_kde, len(predict_data), 1 - self.__cdf_cutoff / 2, self.MIN_VALUE, self.MAX_VALUE) for player in get_players_in_season(predict_season): if player in predict_data: predictions = [] for data in predict_data[player]: data = min(max(data, min_value), max_value) non_mol_likelihood = non_mol_kde.pdf(data)[0] * ( len(predict_data) - 1) / len(predict_data) mol_likelihood = mol_kde.pdf(data)[0] / len(predict_data) likelihood = mol_likelihood / (non_mol_likelihood + mol_likelihood) predictions.append(likelihood) all_predictions[player] = MultiLayerResult( np.array(predictions), False) else: all_predictions[player] = MultiLayerResult(np.array([]), True) return all_predictions
def __training( self, extractor: AppearanceExtractor ) -> Tuple[gaussian_kde, gaussian_kde]: """ Execute the training phase of the Appearance Layer. Arguments: extractor (AppearanceExtractor): The extractor which delivers the training data. Returns: The kernel density estimator for respectively the Mol data and non-Mol data. """ train_input, train_output = extractor.get_train_data() non_mol_input = np.array( [ti[0] for ti, to in zip(train_input, train_output) if to == 0.0]) mol_input = np.array( [ti[0] for ti, to in zip(train_input, train_output) if to == 1.0]) non_mol_kde = self.kernel_density_estimation(non_mol_input) mol_kde = self.kernel_density_estimation(mol_input) return non_mol_kde, mol_kde
from Layers.Appearance.AppearanceExtractor import AppearanceExtractor from scipy.stats import norm import matplotlib.pyplot as plt import numpy as np TEST_SEASONS = {13, 14, 15, 16, 17, 18, 19, 20} extractor = AppearanceExtractor(0, 0, TEST_SEASONS, 1, 1, 0.0) train_input, train_output = extractor.get_train_data() non_mol = [ data[0] for data, label in zip(train_input, train_output) if label == 0.0 ] mol = [ data[0] for data, label in zip(train_input, train_output) if label == 1.0 ] plt.figure(figsize=(12, 3)) plt.xlabel("Relative Appearance") plt.ylabel("Is 'mol'") plt.yticks(np.linspace(0.0, 1.0, 11)) plt.gcf().subplots_adjust(bottom=0.15) mol_norm = norm.fit(mol) X = np.linspace(-1.5, 1.0, 500) mol_Y = [norm.pdf(x, loc=mol_norm[0], scale=mol_norm[1]) for x in X] plt.plot(X, mol_Y, color='r') non_mol_norm = norm.fit(non_mol) non_mol_Y = [ norm.pdf(x, loc=non_mol_norm[0], scale=non_mol_norm[1]) for x in X ]
from Layers.Appearance.AppearanceLayer import InnerAppearanceLayer from Layers.Appearance.AppearanceExtractor import AppearanceExtractor from scipy.stats import mannwhitneyu import matplotlib.pyplot as plt import numpy as np TEST_SEASONS = {13, 14, 15, 16, 17, 18, 19, 20} AUGMENTATION_CUTS = 4 AUGMENTATION_MIN_CUTS_ON = 2 OUTLIER_CUTOFF = 0.01 extractor = AppearanceExtractor(0, 0, TEST_SEASONS, AUGMENTATION_CUTS, AUGMENTATION_MIN_CUTS_ON, OUTLIER_CUTOFF) train_input, train_output = extractor.get_train_data() non_mol = [ data[0] for data, label in zip(train_input, train_output) if label == 0.0 ] mol = [ data[0] for data, label in zip(train_input, train_output) if label == 1.0 ] plt.figure(figsize=(12, 3)) plt.xlabel("Relative Appearance") plt.ylabel("Is 'mol'") plt.yticks(np.linspace(0.0, 1.0, 11)) plt.gcf().subplots_adjust(bottom=0.15) non_mol_kde = InnerAppearanceLayer.kernel_density_estimation(non_mol) mol_kde = InnerAppearanceLayer.kernel_density_estimation(mol) x = InnerAppearanceLayer.get_boundary(non_mol_kde, mol_kde, 10, 0.005, InnerAppearanceLayer.MIN_VALUE,
from Layers.Appearance.VideoParser import VideoParser, ParsedVideo from scipy.stats import pearsonr, kendalltau import itertools TEST_SEASONS = {13, 14, 15, 16, 17, 18, 19, 20} appearances = dict() for season in TEST_SEASONS: for episode in itertools.count(1): parsed_video = VideoParser.load_parsed_video(season, episode) if parsed_video is None: break for player in parsed_video.alive_players: if not get_is_mol(player): appearance = AppearanceExtractor.get_relative_occurrence(player, parsed_video, [True]) appearances[player] = appearances.get(player, []) + [appearance] input = [] output = [] for player, features in appearances.items(): for feat1, feat2 in itertools.combinations(features, 2): input.append(feat1) output.append(feat2) r, p_value = pearsonr(input, output) print("Pearson Test (Between):") print("R value: " + str(r)) print("R-squared value: " + str(r ** 2)) print("p-value: " + str(p_value)) print()
from Layers.Appearance.AppearanceExtractor import AppearanceExtractor import matplotlib.pyplot as plt import numpy as np TEST_SEASONS = {13, 14, 15, 16, 17, 18, 19, 20, 21} AUGMENTATION_CUTS = 1 AUGMENTATION_MIN_CUTS_ON = 1 OUTLIER_CUTOFF = 0.00 extractor = AppearanceExtractor(0, 0, TEST_SEASONS, AUGMENTATION_CUTS, AUGMENTATION_MIN_CUTS_ON) train_input, _ = extractor.get_train_data() train_input = np.squeeze(np.exp(train_input) - AppearanceExtractor.SMALL_LOG_ADDITION, axis=1) plt.hist(train_input, bins=10, edgecolor='black') plt.xlabel("Absolute Appearance") plt.ylabel("#Occurrences") plt.show()