Пример #1
0
def test_model(model, league, params, csv_test_file_path, num_of_last_games_list, cv=5):
    sp_model = SoccerPredictModel(fake_model, redwoodParser, data_manger, league, data_predict_org)
    grid_search = GridSearchCV(estimator=model, param_grid=params, n_jobs=-1, cv=cv)

    # get the train data and train the model
    train_data_vector, train_winners_vector = get_train_data(sp_model)
    print('train the model : ')
    grid_search.fit(numpy.array(train_data_vector), numpy.array(train_winners_vector))

    print('grid_search : {}'.format(grid_search))
    print('grid_search, best_estimator_ : {}'.format(grid_search.best_estimator_))
    print('grid_search, best_params_ : {}'.format(grid_search.best_params_))
    print('grid_search, best_score_ : {}'.format(grid_search.best_score_))

    result = []

    for num in num_of_last_games_list:
        # get the test data
        test_vector_list, test_winner_list = get_test_data(csv_test_file_path, sp_model.get_data_after_parse(), num)

        # test the model and get the score
        score = grid_search.score(numpy.array(test_vector_list), numpy.array(test_winner_list))
        print('score for num of last games: {} score : {}'.format(num, score))

        result.append([num, score])
    print('grid_search.cv_results_ : {}'.format(grid_search.cv_results_))
    return result, grid_search.best_estimator_
Пример #2
0
    def get(self, league, home_team, away_team):
        sp_model_file_name = ''

        print('get : {}/{}/{}'.format(league, home_team, away_team))

        if league in ['England', 'Spain', 'Italy', 'Germany']:
            sp_model_file_name = 'modelForProd/' + league.lower(
            ) + '_sp_model.joblib'
        else:
            return "league : {} is not supported".format(league), 404

        print('load {}'.format(sp_model_file_name))
        model = load(sp_model_file_name)

        get_db_connector = mongo_API()
        get_data_manger = FileDataManger(get_db_connector)
        get_redwoodParser = RedWoodParser()
        get_data_predict_org = Data_Predict_Organizer()

        sp_model = SoccerPredictModel(model, get_redwoodParser,
                                      get_data_manger, league,
                                      get_data_predict_org)
        mapped_home_team = mapper.map(home_team)
        print(mapped_home_team)
        mapped_away_team = mapper.map(away_team)
        print(mapped_away_team)
        res = sp_model.predict(mapped_home_team, mapped_away_team, 6)
        print(res)

        return res, 200
Пример #3
0
    def test_basic_model_english_random_forest(self):
        db_connector = mongo_API()
        data_manger = DataManger(db_connector)
        redwoodParser = RedWoodParser()
        model = RandomForestClassifier()
        data_predict_org = Data_Predict_Organizer()

        sp_model = SoccerPredictModel(model, redwoodParser, data_manger,
                                      'England', data_predict_org)
        sp_model.train(7, 0.1)

        test(sp_model, 10, self.english_csv_file)
Пример #4
0
    def test_basic_model_italy(self):
        db_connector = mongo_API()
        data_manger = DataManger(db_connector)
        redwoodParser = RedWoodParser()
        model = XGBClassifier(max_depth=3, booster='gblinear')
        data_predict_org = Data_Predict_Organizer()

        sp_model_italy = SoccerPredictModel(model, redwoodParser, data_manger,
                                            'Italy', data_predict_org)
        sp_model_italy.train(7, 0.1)

        test(sp_model_italy, 7, self.italy_csv_file)
Пример #5
0
    def test_basic_model_english_KNN(self):
        db_connector = mongo_API()
        data_manger = DataManger(db_connector)
        redwoodParser = RedWoodParser()
        model = KNeighborsClassifier(n_neighbors=3, )
        data_predict_org = Data_Predict_Organizer()

        sp_model = SoccerPredictModel(model, redwoodParser, data_manger,
                                      'England', data_predict_org)
        sp_model.train(7, 0.1)

        test(sp_model, 6, self.english_csv_file)
Пример #6
0
    def test_basic_model_english_xg(self):
        db_connector = mongo_API()
        data_manger = DataManger(db_connector)
        redwoodParser = RedWoodParser()
        ignore_list = [
            'Date', 'TournamentName', 'SeasonName', 'RoundId', 'HomeTeamName',
            'AwayTeamName'
        ]
        redwoodParser.set_ignore_list(ignore_list)
        model = XGBClassifier(max_depth=5, booster='gblinear')
        data_predict_org = Data_Predict_Organizer()

        sp_model = SoccerPredictModel(model, redwoodParser, data_manger,
                                      'England', data_predict_org)
        sp_model.train(7, 0.1)

        test(sp_model, 6, self.english_csv_file)
Пример #7
0
def search_cat_boost(league, test_data):
    results = []
    current_max_accurate = 0
    final_most_acc = ''

    #[iterations, learning_rate, depth_cat_boost, boosting_type_cat_boost]

    for iter in parameters_cat_boost[iterations]:
        for l_rate in parameters_cat_boost[learning_rate]:
            for dep in parameters_cat_boost[depth_cat_boost]:
                for b_type in parameters_cat_boost[boosting_type_cat_boost]:
                    model = CatBoostClassifier(iterations=iter,
                                               learning_rate=l_rate,
                                               depth=dep,
                                               boosting_type=b_type,
                                               loss_function='MultiClass',
                                               silent=True)
                    sp_model_cat_boost = SoccerPredictModel(
                        model, redwoodParser, data_manger, league,
                        data_predict_org)
                    sp_model_cat_boost.train(7, 0.1)

                    for n_games in num_of_last_games:
                        accurate_result = 0
                        for game in test_data:
                            res__proba = sp_model_cat_boost.predict(
                                game[home_team], game[away_team], n_games)
                            res = get_winner_from_prob(res__proba)

                            if res == game[real_winner]:
                                accurate_result += 1

                        accurate_percentage = accurate_result / len(test_data)
                        results.append([
                            iter, l_rate, dep, b_type, n_games,
                            accurate_percentage
                        ])
                        print(
                            'iter : {}, l_rate : {}, dep : {}, obj : {}, n_games : {}, acc : {} '
                            .format(iter, l_rate, dep, b_type, n_games,
                                    accurate_percentage))

                        if accurate_percentage > current_max_accurate:
                            current_max_accurate = accurate_percentage
                            final_most_acc = '{},{},{},{},{},{}'.format(
                                iter, l_rate, dep, b_type, n_games,
                                accurate_percentage)
                            print('save model : {}'.format(final_most_acc))
                            dump(
                                sp_model_cat_boost.get_model(),
                                joblib_save_path.format(
                                    league, 'CatBoostClassifier',
                                    accurate_percentage.__str__().replace(
                                        '.', ',')))

    return results, final_most_acc
Пример #8
0
def search_lgbm(league, test_data):
    results = []
    current_max_accurate = 0
    final_most_acc = ''

    for b_type in parameters_LGBM[boosting_type]:
        for l_rate in parameters_LGBM[learning_rate]:
            for n_est in parameters_LGBM[n_estimators]:
                for obj in parameters_LGBM[objective_lgbm]:
                    model = LGBMClassifier(boosting_type=b_type,
                                           learning_rate=l_rate,
                                           n_estimators=n_est,
                                           objective=obj)
                    sp_model_LGBM = SoccerPredictModel(model, redwoodParser,
                                                       data_manger, league,
                                                       data_predict_org)
                    sp_model_LGBM.train(7, 0.1)

                    for n_games in num_of_last_games:
                        accurate_result = 0
                        for game in test_data:
                            res__proba = sp_model_LGBM.predict(
                                game[home_team], game[away_team], n_games)
                            res = get_winner_from_prob(res__proba)

                            if res == game[real_winner]:
                                accurate_result += 1

                        accurate_percentage = accurate_result / len(test_data)
                        results.append([
                            b_type, l_rate, n_est, obj, n_games,
                            accurate_percentage
                        ])
                        print(
                            'b_type : {}, l_rate : {}, n_est : {}, obj : {}, n_games : {}, acc : {} '
                            .format(b_type, l_rate, n_est, obj, n_games,
                                    accurate_percentage))

                        if accurate_percentage > current_max_accurate:
                            current_max_accurate = accurate_percentage
                            final_most_acc = '{},{},{},{},{},{}'.format(
                                b_type, l_rate, n_est, obj, n_games,
                                accurate_percentage)
                            print('save model : {}'.format(final_most_acc))
                            dump(
                                sp_model_LGBM.get_model(),
                                joblib_save_path.format(
                                    league, 'LGBMClassifier',
                                    accurate_percentage.__str__().replace(
                                        '.', ',')))

    return results, final_most_acc
Пример #9
0
def search_RF(league, test_data):
    results = []
    current_max_accurate = 0
    final_most_acc = ''

    for n_est in parameters_random_forest[n_estimators_RF]:
        for crit in parameters_random_forest[criterion]:
            for bootstrap_param in parameters_random_forest[bootstrap]:
                for warm_s in parameters_random_forest[warm_start]:
                    model = RandomForestClassifier(n_estimators=n_est,
                                                   criterion=crit,
                                                   bootstrap=bootstrap_param,
                                                   warm_start=warm_s)
                    sp_model_rf = SoccerPredictModel(model, redwoodParser,
                                                     data_manger, league,
                                                     data_predict_org)
                    sp_model_rf.train(7, 0.1)

                    for n_games in num_of_last_games:
                        accurate_result = 0
                        for game in test_data:
                            res__proba = sp_model_rf.predict(
                                game[home_team], game[away_team], n_games)
                            res = get_winner_from_prob(res__proba)

                            if res == game[real_winner]:
                                accurate_result += 1

                        accurate_percentage = accurate_result / len(test_data)
                        results.append([
                            n_est, crit, bootstrap_param, warm_s, n_games,
                            accurate_percentage
                        ])
                        print(
                            'n_est : {}, crit : {}, bootstrap_param : {}, warm_s : {}, n_games : {}, acc : {} '
                            .format(n_est, crit, bootstrap_param, warm_s,
                                    n_games, accurate_percentage))

                        if accurate_percentage > current_max_accurate:
                            current_max_accurate = accurate_percentage
                            final_most_acc = '{},{},{},{},{},{}'.format(
                                n_est, crit, bootstrap_param, warm_s, n_games,
                                accurate_percentage)
                            print('save model : {}'.format(final_most_acc))
                            dump(
                                sp_model_rf.get_model(),
                                joblib_save_path.format(
                                    league, 'RandomForestClassifier',
                                    accurate_percentage.__str__().replace(
                                        '.', ',')))

    return results, final_most_acc
Пример #10
0
def search_KNN(league, test_data):
    results = []
    current_max_accurate = 0
    final_most_acc = ''

    for n_neighbor in parameters_KNN[n_neighbors]:
        for weight in parameters_KNN[weights]:
            for algo in parameters_KNN[algorithm]:
                for power in parameters_KNN[p]:
                    model = KNeighborsClassifier(n_neighbors=n_neighbor,
                                                 weights=weight,
                                                 algorithm=algo,
                                                 p=power)
                    sp_model_knn = SoccerPredictModel(model, redwoodParser,
                                                      data_manger, league,
                                                      data_predict_org)
                    sp_model_knn.train(7, 0.1)

                    for n_games in num_of_last_games:
                        accurate_result = 0
                        for game in test_data:
                            res__proba = sp_model_knn.predict(
                                game[home_team], game[away_team], n_games)
                            res = get_winner_from_prob(res__proba)

                            if res == game[real_winner]:
                                accurate_result += 1

                        accurate_percentage = accurate_result / len(test_data)
                        results.append([
                            n_neighbor, weight, algo, power, n_games,
                            accurate_percentage
                        ])
                        print(
                            'n_neighbor : {}, weight : {}, algo : {}, power : {}, n_games : {}, acc : {} '
                            .format(n_neighbor, weight, algo, power, n_games,
                                    accurate_percentage))

                        if accurate_percentage > current_max_accurate:
                            current_max_accurate = accurate_percentage
                            final_most_acc = '{},{},{},{},{},{}'.format(
                                n_neighbor, weight, algo, power, n_games,
                                accurate_percentage)
                            print('save model : {}'.format(final_most_acc))
                            dump(
                                sp_model_knn.get_model(),
                                joblib_save_path.format(
                                    league, 'KNeighborsClassifier',
                                    accurate_percentage.__str__().replace(
                                        '.', ',')))

    return results, final_most_acc
Пример #11
0
def search_XGBClassifier(league, test_data, silent_param=True):
    results = []
    current_max_accurate = 0
    final_most_acc = ''

    for estimator in parameters_XGB[n_estimators]:
        for m_depth in parameters_XGB[max_depth]:
            for boost in parameters_XGB[booster]:
                for obj in parameters_XGB[objective]:
                    for l_rate in parameters_XGB[learning_rate]:
                        for b_score in parameters_XGB[base_score]:
                            model = XGBClassifier(max_depth=m_depth,
                                                  estimator=estimator,
                                                  booster=boost,
                                                  objective=obj,
                                                  learning_rate=l_rate,
                                                  silent=silent_param,
                                                  base_score=b_score)
                            sp_model = SoccerPredictModel(
                                model, redwoodParser, data_manger, league,
                                data_predict_org)
                            sp_model.train(7, 0.1)

                            for n_games in num_of_last_games:
                                accurate_result = 0
                                for game in test_data:
                                    res__proba = sp_model.predict(
                                        game[home_team], game[away_team],
                                        n_games)
                                    res = get_winner_from_prob(res__proba)

                                    if res == -1:
                                        print('error -1')

                                    if res == game[real_winner]:
                                        accurate_result += 1

                                accurate_percentage = accurate_result / len(
                                    test_data)
                                results.append([
                                    estimator, m_depth, boost, obj, l_rate,
                                    b_score, n_games, accurate_percentage
                                ])
                                print(
                                    'estimator : {}, m_depth : {}, boost : {}, obj : {}, l_rate : {}, b_score : {}, n_games : {}, acc : {} '
                                    .format(estimator, m_depth, boost, obj,
                                            l_rate, b_score, n_games,
                                            accurate_percentage))

                                if accurate_percentage > current_max_accurate:
                                    current_max_accurate = accurate_percentage
                                    final_most_acc = '{},{},{},{},{},{},{},{} '.format(
                                        estimator, m_depth, boost, obj, l_rate,
                                        b_score, n_games, accurate_percentage)
                                    print('save model : {}'.format(
                                        final_most_acc))
                                    dump(
                                        sp_model.get_model(),
                                        joblib_save_path.format(
                                            league, 'XGBClassifier',
                                            accurate_percentage.__str__().
                                            replace('.', ',')))

    return results, final_most_acc
Пример #12
0
#from lightgbm import LGBMClassifier
# from catboost import CatBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV

db_connector = mongo_API()
data_manger = DataManger(db_connector)
redwoodParser = RedWoodParser()
model = XGBClassifier(max_depth=3, booster='gblinear')
#model = RandomForestClassifier()
#model = CatBoostClassifier()
#model = LGBMClassifier()
#model = KNeighborsClassifier()
data_predict_org = Data_Predict_Organizer()

sp_model_english = SoccerPredictModel(model, redwoodParser, data_manger,
                                      'England', data_predict_org)
sp_model_english.train(7, 0.1)

number_of_last_game = 6

res_eng_12_4_19_1 = sp_model_english.predict_proba('Leicester', 'Newcastle',
                                                   number_of_last_game)

res_eng_13_4_19_1 = sp_model_english.predict_proba('Tottenham', 'Huddersfield',
                                                   number_of_last_game)
res_eng_13_4_19_2 = sp_model_english.predict_proba('Burnley', 'Cardiff',
                                                   number_of_last_game)
res_eng_13_4_19_3 = sp_model_english.predict_proba('Brighton', 'Bournemouth',
                                                   number_of_last_game)
res_eng_13_4_19_4 = sp_model_english.predict_proba('Fulham', 'Everton',
                                                   number_of_last_game)
Пример #13
0
    def test_grid_search_english_model_xgboost(self):
        # general things:
        db_connector = mongo_API()
        data_manger = DataManger(db_connector)
        redwoodParser = RedWoodParser()
        data_predict_org = Data_Predict_Organizer()

        # model params
        max_depths = range(1, 22, 1)
        learning_rates = [x / 20 for x in range(1, 21)]
        n_estimators = range(50, 400, 50)
        objectives = [
            "binary:logistic", "reg:linear", "reg:logistic", "binary:logistic",
            "binary:logitraw", "count:poisson", "multi:softmax",
            "multi:softprob", "rank:pairwise"
        ]
        boosters = ['gbtree', 'gblinear']
        gammas = [x / 20 for x in range(0, 105, 5)]
        min_child_weights = [x / 2 for x in range(0, 21)]
        max_delta_steps = range(0, 10, 1)
        subsamples = [x / 20 for x in range(0, 21)]
        colsample_bytrees = [x / 20 for x in range(0, 21)]
        #reg_alpha = 0
        #reg_lambda = 1
        base_score = [x / 20 for x in range(1, 21)]
        num_of_games = range(0, 15)

        test_results = []

        for md in max_depths:
            for lr in learning_rates:
                for ne in n_estimators:
                    for obj in objectives:
                        for booster in boosters:
                            for g in gammas:
                                for min_child_w in min_child_weights:
                                    for max_delta_step in max_delta_steps:
                                        for sub_sample in subsamples:
                                            for cb in colsample_bytrees:
                                                for bs in base_score:
                                                    model = XGBClassifier(
                                                        max_depth=md,
                                                        learning_rate=lr,
                                                        n_estimators=ne,
                                                        objective=obj,
                                                        booster=booster,
                                                        gamma=g,
                                                        min_child_weight=
                                                        min_child_w,
                                                        max_delta_step=
                                                        max_delta_step,
                                                        subsample=sub_sample,
                                                        colsample_bytree=cb,
                                                        base_score=bs)
                                                    sp_model = SoccerPredictModel(
                                                        model, redwoodParser,
                                                        data_manger, 'England',
                                                        data_predict_org)
                                                    sp_model.train(7, 0.1)
                                                    for num in num_of_games:
                                                        res = test(
                                                            sp_model, num,
                                                            self.
                                                            english_csv_file)
                                                        test_results.append({
                                                            'max_depth':
                                                            md,
                                                            'learning_rate':
                                                            lr,
                                                            'n_estimators':
                                                            ne,
                                                            'objective':
                                                            obj,
                                                            'booster':
                                                            booster,
                                                            'gamma':
                                                            g,
                                                            'min_child_weight':
                                                            min_child_w,
                                                            'max_delta_step':
                                                            max_delta_step,
                                                            'subsample':
                                                            sub_sample,
                                                            'colsample_bytree':
                                                            cb,
                                                            'base_score':
                                                            bs,
                                                            'res':
                                                            res
                                                        })

        print(test_results)
        with open(self.english_xg_result) as res_file:
            # save fields names
            for k, v in test_results[0].items():
                res_file.write(k + ',')

            for result in test_results:
                for k, v in result.items():
                    res_file.write(v + ',')