Пример #1
0
from sklearn.model_selection import train_test_split
from copy import copy
from sklearn.decomposition import FastICA
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import FunctionTransformer

import logging

from code_.domain.games_info import SeasonFirstHalfAggregator
from code_.domain.performance_analyzer import PerformanceAnalyzer

import settings as stg

if __name__ == '__main__':
    stg.enable_logging(log_filename='{}.log'.format(splitext(basename(__file__))[0]),
                       logging_level=logging.DEBUG)

logging.info('Start of script {}'.format(basename(__file__)))

logging.info('Load data ..')
sfha = SeasonFirstHalfAggregator(saved_filename=stg.FILENAME_STATS_AGGREGATED)
df = sfha.build_players_stats_dataset(sliding_interval_min=5,
                                      list_events_number=stg.EVENTS_COMPUTE_NUMBER,
                                      list_events_with_success_rate=stg.EVENTS_COMPUTE_SUCCESS_RATE)
logging.info('.. Done')

train, test = train_test_split(df, test_size=0.3, random_state=42)
X_train, y_train = train[stg.PLAYER_FEATURES], train[stg.PLAYER_TARGET]
X_test, y_test = test[stg.PLAYER_FEATURES], test[stg.PLAYER_TARGET]

logging.info('Impute missing values with median ..')
Пример #2
0
import logging
import numpy as np
import os
import pandas as pd

from lxml import etree
from os.path import join

from code_.infrastructure.game import Game
import settings as stg

if __name__ == '__main__':
    stg.enable_logging(log_filename='players.log')


class Players():
    """Soccer players.

    Attributes
    ----------
    all_players: pandas.DataFrame
    transfered_players: pandas.DataFrame
    players_more_800_min: pandas.DataFrame
        Players having played more than 800 minutes in first half of season

    """

    def __init__(self, xml_filename=stg.FILENAME_ALL_PLAYERS,
                 saved_csv_filename=stg.FILENAME_PLAYERS_MORE_800):
        """Init class.