Пример #1
0
# 학습 데이터셋
train_X = any_football_data.drop(label_columns, axis=1)
train_Y = any_football_data['result_type']  # 경기 결과 (승=0, 무=1, 패=2)

# 특성 처리
feature_pipeline = ColumnTransformer([
    ("num", StandardScaler(), num_attribs),
    ("cat", OneHotEncoder(), cat_attribs_with_camp),
])

# 모델 훈련
prepared_train = feature_pipeline.fit_transform(train_X)
print(f'prepared_train data_set shape: {prepared_train.shape}')

start_prepared = time.time()

knn_clf = KNeighborsClassifier(weights='distance', n_neighbors=19)
knn_clf.fit(prepared_train, train_Y)

end_prepared = time.time()

print(f'elapsed time: {end_prepared - start_prepared} sec \n')

full_pipeline = Pipeline([("preparation", feature_pipeline),
                          ("predictor", knn_clf)])

# 저장
dir_name = datetime.today().strftime("knn_%Y%m%d-%H%M%S")
utility.save_sklearn_model(full_pipeline, dir_name, "knn_clf_result_any.pkl")
Пример #2
0
sgd_home_pred = np.round(sgd_clf_4_5_any.predict_proba(home_train), decimals=3)
sgd_away_pred = np.round(sgd_clf_4_5_any.predict_proba(away_train), decimals=3)

home_prepared_train = np.c_[np.ones(len(home_train)),
                            np.zeros(len(home_train)), knn_home_pred,
                            rft_home_pred, sgd_home_pred]
away_prepared_train = np.c_[np.zeros(len(away_train)),
                            np.ones(len(away_train)), knn_away_pred,
                            rft_away_pred, sgd_away_pred]

train_X = np.r_[home_prepared_train, away_prepared_train]
train_Y = np.r_[home_result_4_5_over_Y, away_result_4_5_over_Y]

# SGDClassifier 학습
start_prepared = time.time()

knn_clf_4_5 = KNeighborsClassifier(weights='uniform', n_neighbors=100)
knn_clf_4_5.fit(train_X, train_Y)

end_prepared = time.time()

print(
    f'4.5 knn predictor elapsed time: {end_prepared - start_prepared} sec \n')

# Save
dir_name = datetime.today().strftime("fin_%Y%m%d-%H%M%S")
utility.save_sklearn_model(knn_clf_1_5, dir_name, "fin_knn_clf_1_5.pkl")
utility.save_sklearn_model(knn_clf_2_5, dir_name, "fin_knn_clf_2_5.pkl")
utility.save_sklearn_model(knn_clf_3_5, dir_name, "fin_knn_clf_3_5.pkl")
utility.save_sklearn_model(knn_clf_4_5, dir_name, "fin_knn_clf_4_5.pkl")
Пример #3
0
lin_away_pred = np.round(lin_reg_any.predict(away_train), decimals=3)

# prepare data
# league_cat = utility.LeagueTypeCategorize()
# leagues = league_cat.transform(home_train.copy())
home_prepared_train = np.c_[np.ones(len(home_train)),
                            np.zeros(len(home_train)), knn_home_pred,
                            lin_home_pred]
away_prepared_train = np.c_[np.zeros(len(away_train)),
                            np.ones(len(away_train)), knn_away_pred,
                            lin_away_pred]

train_X = np.r_[home_prepared_train, away_prepared_train]
train_Y = np.r_[home_score_Y, away_score_Y]

# LinearRegression 학습
start_prepared = time.time()

fin_lin_reg = LinearRegression()
fin_lin_reg.fit(train_X, train_Y)  # 훈련

end_prepared = time.time()

print(
    f'lin score predictor elapsed time: {end_prepared - start_prepared} sec \n'
)

# Save
dir_name = datetime.today().strftime("fin_%Y%m%d-%H%M%S")
utility.save_sklearn_model(fin_lin_reg, dir_name, "fin_lin_reg_score.pkl")
Пример #4
0
train_X = np.r_[home_prepared_train, away_prepared_train]
train_Y = np.r_[home_result_Y, away_result_Y]

# SGDClassifier 학습
# start_prepared = time.time()
#
# sgd_clf = SGDClassifier(max_iter=3000, tol=1e-3, random_state=42, alpha=0.01, loss='log')
# sgd_clf.fit(train_X, train_Y)
#
# end_prepared = time.time()
#
# print(f'sgd result predictor elapsed time: {end_prepared - start_prepared} sec \n')

# KNeighborsClassifier 학습
start_prepared = time.time()

knn_clf = KNeighborsClassifier(weights='uniform', n_neighbors=100)
knn_clf.fit(train_X, train_Y)

end_prepared = time.time()

print(
    f'knn result predictor elapsed time: {end_prepared - start_prepared} sec \n'
)

# Save
dir_name = datetime.today().strftime("fin_%Y%m%d-%H%M%S")
# utility.save_sklearn_model(sgd_clf, dir_name, "fin_sgd_clf_result.pkl")
utility.save_sklearn_model(knn_clf, dir_name, "fin_knn_clf_result.pkl")
Пример #5
0
# 학습 데이터셋
train_X = any_football_data.drop(label_columns, axis=1)
train_Y = any_football_data['score']  # 경기 결과 (승=0, 무=1, 패=2)

# 특성 처리
feature_pipeline = ColumnTransformer([
    ("num", StandardScaler(), num_attribs),
    ("cat", OneHotEncoder(), cat_attribs_with_camp),
])

# 모델 훈련
prepared_train = feature_pipeline.fit_transform(train_X)
print(f'prepared_train data_set shape: {prepared_train.shape}')

start_prepared = time.time()

knn_reg = KNeighborsRegressor(n_neighbors=19, weights="distance")
knn_reg.fit(prepared_train, train_Y)  # 훈련

end_prepared = time.time()

print(f'elapsed time: {end_prepared - start_prepared} sec \n')

full_pipeline = Pipeline([("preparation", feature_pipeline),
                          ("predictor", knn_reg)])

# 저장
dir_name = datetime.today().strftime("knn_%Y%m%d-%H%M%S")
utility.save_sklearn_model(full_pipeline, dir_name, "knn_reg_score_any.pkl")
Пример #6
0
knn_clf_3_5 = KNeighborsClassifier(weights='distance', n_neighbors=19)
knn_clf_3_5.fit(prepared_train, train_3_5_Y)

end_prepared = time.time()

print(f'3.5 predictor elapsed time: {end_prepared - start_prepared} sec \n')

full_pipeline_3_5 = Pipeline([("preparation", feature_pipeline),
                              ("predictor", knn_clf_3_5)])

# 모델 훈련 (4.5)
start_prepared = time.time()

knn_clf_4_5 = KNeighborsClassifier(weights='distance', n_neighbors=19)
knn_clf_4_5.fit(prepared_train, train_4_5_Y)

end_prepared = time.time()

print(f'4.5 predictor elapsed time: {end_prepared - start_prepared} sec \n')

full_pipeline_4_5 = Pipeline([("preparation", feature_pipeline),
                              ("predictor", knn_clf_4_5)])

# 저장
dir_name = datetime.today().strftime("knn_%Y%m%d-%H%M%S")
utility.save_sklearn_model(full_pipeline_1_5, dir_name, "knn_clf_1_5_any.pkl")
utility.save_sklearn_model(full_pipeline_2_5, dir_name, "knn_clf_2_5_any.pkl")
utility.save_sklearn_model(full_pipeline_3_5, dir_name, "knn_clf_3_5_any.pkl")
utility.save_sklearn_model(full_pipeline_4_5, dir_name, "knn_clf_4_5_any.pkl")
Пример #7
0
    ("cat", OneHotEncoder(), cat_attribs_with_camp),
])

feature_pipeline = Pipeline([
    ('columns', columns_pipeline_home),
])

# 모델 훈련
prepared_train = feature_pipeline.fit_transform(train_X)
print(f'prepared_train data_set shape: {prepared_train.shape}')

start_prepared = time.time()

sgd_clf = SGDClassifier(max_iter=2000,
                        tol=1e-3,
                        random_state=42,
                        alpha=0.01,
                        loss='log')
sgd_clf.fit(prepared_train, train_Y)

end_prepared = time.time()

print(f'elapsed time: {end_prepared - start_prepared} sec \n')

full_pipeline = Pipeline([("preparation", feature_pipeline),
                          ("predictor", sgd_clf)])

# 저장
dir_name = datetime.today().strftime("sgd_%Y%m%d-%H%M%S")
utility.save_sklearn_model(full_pipeline, dir_name, "sgd_clf_bts_any.pkl")