x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8) model = XGBRFClassifier(n_estimators=1000, learning_rate=0.1) model.fit(x_train, y_train, verbose=True, eval_metric="error", eval_set=[(x_train, y_train), (x_test, y_test)]) #rmse,mae,logloss,error,auc results = model.evals_result() print("eval:", results) y_pred = model.predict(x_test) acc = accuracy_score(y_test, y_pred) print("acc:", acc) # import pickle # pickle.dump(model, open("./model/sample/xgb_save/cancer.pickle.dat", "wb")) import joblib joblib.dump(model, "./model/sample/xgb_save/cancer.joblib.dat") print("저장됨.") model2 = joblib.load("./model/sample/xgb_save/cancer.joblib.dat") print("불러오깅") y_pred = model.predict(x_test) acc = accuracy_score(y_test, y_pred)
model = XGBRFClassifier( n_estimators=300, # verbose의 갯수, epochs와 동일 learning_rate=0.1) model.fit(x_train, y_train, verbose=True, eval_metric=['error', 'auc'], eval_set=[(x_train, y_train), (x_test, y_test)]) # early_stopping_rounds = 100) # eval_metic의 종류 : rmse, mae, logloss, error(error가 0.2면 accuracy는 0.8), auc(정확도, 정밀도; accuracy의 친구다) results = model.evals_result() print("eval's result : ", results) y_pred = model.predict(x_test) acc = accuracy_score(y_test, y_pred) # print("r2 Score : %.2f%%" %(r2 * 100)) print("acc : ", acc) thresholds = np.sort(model.feature_importances_) import pickle print(thresholds) for thresh in thresholds: #중요하지 않은 컬럼들을 하나씩 지워나간다. selection = SelectFromModel(model, threshold=thresh, prefit=True) selection_x_train = selection.transform(x_train) selection_x_test = selection.transform(x_test)
if test: y = None X = d.values else: y = np.ravel(d[['Survived']].values) X = d.drop(columns=['Survived']).values X = preprocessing.scale(X) return (X, y) (Xtrain, ytrain) = for_model_input(trainset) knn_imputer = KNNImputer() Xtrain = knn_imputer.fit_transform(Xtrain) boosted_model = XGBRFClassifier() boosted_model.fit(Xtrain, ytrain) boosted_scores = cross_val_score(boosted_model, Xtrain, ytrain, cv=5) print("Gradient-Boosting Model CV scores:\n", boosted_scores, np.mean(boosted_scores)) (Xtest, _) = for_model_input(testset, test=True) Xtest = knn_imputer.fit_transform(Xtest) predictions_boosted = boosted_model.predict(Xtest) # + 1) / 2 predictions_boosted = predictions_boosted.astype('int64') pred_boosted_df = pandas.DataFrame(predictions_boosted, columns=['Survived']) fin_ans_boosted = pandas.DataFrame( testset['PassengerId']).join(pred_boosted_df) with open('predictions_xgboost_rf.csv', 'w') as f: f.write((fin_ans_boosted.to_csv(index=False)))
from sklearn.preprocessing import StandardScaler sc = StandardScaler() print(f"\nBefore Standard Scaler, x.head() :- \n{ x.head() }") x = sc.fit_transform(x) print(f"\nAfter Standard Scaler, x :- \n{ x }") from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) from xgboost import XGBRFClassifier xgboost = XGBRFClassifier() xgboost.fit(x_train, y_train) y_pred = xgboost.predict(x_test) print( f"xgboost.score( x_test, y_test ) = { xgboost.score( x_test, y_test ) * 100 }%" ) import matplotlib.pyplot as plt plt.plot(x_test, y_test, label='Actual', marker='*', color='blue', linestyle='') plt.plot(x_test, y_pred,
# # # downsample majority # neg_downsampled = resample(negative, # replace=True, # sample with replacement # n_samples=len(positive), # match number in minority class # random_state=27) # reproducible results # # combine minority and downsampled majority # downsampled = pd.concat([positive, neg_downsampled]).dropna() # check new class counts # # X_train = pd.DataFrame(downsampled.drop(columns="target"), index=downsampled.index) # y_train = pd.Series(downsampled["target"], index=downsampled.index) my_model = XGBRFClassifier(random_state=1).fit(X_train, y_train) predictions = my_model.predict(X_val) print("Matthews Correlation Coefficient: " + str(matthews_corrcoef(predictions, y_val))) print("Precision Score: " + str(precision_score(predictions, y_val))) print("Recall Score: " + str(recall_score(predictions, y_val))) ROC_curve(y_val, predictions) X_train_filtered = pd.DataFrame(X_train).iloc[:, [ 173, 141, 530, 683, 661, 498, 48, 183, 206, 716, 697, 185, 211, 624, 671, 623, 67, 111, 118, 129 ]] X_val_filtered = pd.DataFrame(X_val).iloc[:, [ 173, 141, 530, 683, 661, 498, 48, 183, 206, 716, 697, 185, 211, 624, 671, 623, 67, 111, 118, 129 ]]