示例#1
0
    def __training(self, train_seasons: Set[int]) -> Tuple[gaussian_kde, gaussian_kde]:
        """ Execute the training phase of the Age Layer.

        Arguments:
             train_seasons (Set[int]):  The season used as training data

        Returns:
            The kernel density estimator for respectively the Mol data and non-Mol data.
        """
        players = [player for player in Player if get_season(player) in train_seasons]
        train_input = [float(get_age(player)) for player in players]
        train_output = [1.0 if get_is_mol(player) else 0.0 for player in players]
        non_mol = np.array([data for data, label in zip(train_input, train_output) if label == 0.0])
        mol = np.array([data for data, label in zip(train_input, train_output) if label == 1.0])
        non_mol_kde = InnerAppearanceLayer.kernel_density_estimation(non_mol)
        mol_kde = InnerAppearanceLayer.kernel_density_estimation(mol)
        return non_mol_kde, mol_kde
示例#2
0
 def seasons_with_data() -> Set[int]:
     """ Get all seasons that have Wikipedia data. """
     return {get_season(player) for player in LINKER}
示例#3
0
# Count all words for these seasons
from collections import Counter
from Data.Player import Player
from Data.PlayerData import get_season
from Layers.Wikipedia.WikipediaParser import WikipediaParser
from nltk.corpus import stopwords

SEASONS = {9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}

pure_counter = Counter()
for player in Player:
    if get_season(player) in SEASONS:
        pure_counter += WikipediaParser.wiki_file_parse(player)

compound_counter = Counter()
dictionary = WikipediaParser.get_standard_dictionary()
stop_words = set(stopwords.words('dutch'))
for word, count in pure_counter.items():
    sub_words = WikipediaParser.get_all_sub_words(word, dictionary, WikipediaParser.MIN_LENGTH_COMPOUND_WORD)
    sub_words.difference_update(stop_words)
    for sub_word in sub_words:
        compound_counter[sub_word] += count

print(compound_counter)
示例#4
0
                   mol_points: int) -> float:
    u = (non_mol_kde.pdf(x)[0] - mol_kde.pdf(x)[0])**4
    r = 1 / (2 * math.sqrt(math.pi))
    non_mol_term = non_mol_kde.pdf(x)[0] / (non_mol_points *
                                            math.sqrt(non_mol_bandwidth))
    mol_term = mol_kde.pdf(x)[0] / (mol_points * math.sqrt(mol_bandwidth))
    sigma = (r * (non_mol_term + mol_term))**2
    return u / sigma


TEST_SEASONS = {
    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
}
ALPHA = 0.05

players = [player for player in Player if get_season(player) in TEST_SEASONS]
train_input = [float(get_age(player)) for player in players]
train_output = [1.0 if get_is_mol(player) else 0.0 for player in players]
non_mol = np.array(
    [data for data, label in zip(train_input, train_output) if label == 0.0])
mol = np.array(
    [data for data, label in zip(train_input, train_output) if label == 1.0])

non_mol_kde = InnerAppearanceLayer.kernel_density_estimation(non_mol)
non_mol_bandwidth = silverman_bandwidth(np.array(non_mol))
non_mol_points = len(non_mol)
mol_kde = InnerAppearanceLayer.kernel_density_estimation(mol)
mol_bandwidth = silverman_bandwidth(np.array(mol))
mol_points = len(mol)

ages = [float(age) for age in range(20, 59)]