示例#1
0
文件: mit_model.py 项目: cfong32/lpp
def predict(course_code, user_id):
    filename = get_path(course_code, '%s_model.xgb' % course_code)

    X, y = load_data(course_code)

    user_X = X.loc[user_id]

    # Normalization
    if course_code not in data_transformer:
        scaler = MinMaxScaler()
        scaler.fit(X)
        data_transformer[course_code] = scaler
    scaler = data_transformer[course_code]

    if course_code not in model_cache:
        model = XGBRegressor()
        if os.path.isfile(filename):
            model.load_model(filename)
        else:
            X = scaler.transform(X)
            model.fit(X, y)
            model.save_model(filename)
        model_cache[course_code] = model

    model = model_cache[course_code]
    X = scaler.transform(X)

    y_ = model.predict(X)
    hist, bin_edges = np.histogram(y_, bins=10, range=[0, 1])

    return {
        "classFinalExamDistribution": hist.tolist(),
        "myChapterScore": get_user_chapter_grades(course_code, user_id),
        "myPredictedFinalExamScore": float(model.predict(user_X)[0])
    }
示例#2
0
    def __init__(self, json_file):
        name = "_".join(json_file.split("/")[-4:])
        name = name.replace("/", "_")
        name = name.replace('.json', '')
        name = "XGB_" + name
        name = name.replace("-", "_")
        self.name = name

        # load json and create model
        loaded_model = XGBRegressor()
        loaded_model.load_model(json_file)

        print("Loaded XGBRegressor model from disk:")
        print("\t{}".format(json_file))

        self.model = loaded_model

        # load list of inputs for the model
        sys.path.insert(0, json_file.rstrip(json_file.split('/')[-1]))
        import inputs_for_models_in_this_dir
        reload(inputs_for_models_in_this_dir
               )  # avoid being stuck with previous versions
        this_model_inputs = inputs_for_models_in_this_dir.inputs
        this_model_inputs = [
            i if i not in var_names_at_KIT.keys() else var_names_at_KIT[i]
            for i in this_model_inputs
        ]
        self.inputs = this_model_inputs
示例#3
0
def main():
    print("Loading data...")
    # The training data is used to train your model how to predict the targets.
    training_data = read_csv("numerai_training_data.csv")
    # The tournament data is the data that Numerai uses to evaluate your model.
    tournament_data = read_csv("numerai_tournament_data.csv")

    feature_names = [
        f for f in training_data.columns if f.startswith("feature")
    ]
    print(f"Loaded {len(feature_names)} features")

    # This is the model that generates the included example predictions file.
    # Taking too long? Set learning_rate=0.1 and n_estimators=200 to make this run faster.
    # Remember to delete example_model.xgb if you change any of the parameters below.
    model = XGBRegressor(max_depth=5,
                         learning_rate=0.01,
                         n_estimators=2000,
                         n_jobs=-1,
                         colsample_bytree=0.1)
    if MODEL_FILE.is_file():
        print("Loading pre-trained model...")
        model.load_model(MODEL_FILE)
    else:
        print("Training model...")
        model.fit(training_data[feature_names], training_data[TARGET_NAME])
        model.save_model(MODEL_FILE)

    # Generate predictions on both training and tournament data
    print("Generating predictions...")
    training_data[PREDICTION_NAME] = model.predict(
        training_data[feature_names])
    tournament_data[PREDICTION_NAME] = model.predict(
        tournament_data[feature_names])

    # Check the per-era correlations on the training set (in sample)
    train_correlations = training_data.groupby("era").apply(score)
    print(
        f"On training the correlation has mean {train_correlations.mean()} and std {train_correlations.std()}"
    )
    print(
        f"On training the average per-era payout is {payout(train_correlations).mean()}"
    )

    # Check the per-era correlations on the validation set (out of sample)
    validation_data = tournament_data[tournament_data.data_type ==
                                      "validation"]
    validation_correlations = validation_data.groupby("era").apply(score)
    print(
        f"On validation the correlation has mean {validation_correlations.mean()} and "
        f"std {validation_correlations.std()}")
    print(
        f"On validation the average per-era payout is {payout(validation_correlations).mean()}"
    )

    # Save predictions as a CSV and upload to https://numer.ai
    tournament_data[PREDICTION_NAME].to_csv(TOURNAMENT_NAME +
                                            "_submission.csv")
示例#4
0
class GDPGrowthPredictor:
    """Gbm class"""
    def __init__(self, *args, **kwargs):
        """Create model with given parameters"""
        self.model = XGBRegressor(*args, **kwargs)

    def train(self, filename, split, previous_year, plot, *args, **kwargs):
        """Train model, and plot results"""
        X_train, X_test, y_train, y_test, features = _io.retrieve_training_dataset(
            split, previous_year)
        self.model.fit(X_train, y_train, *args, **kwargs)
        self.save(filename)

        if split != 0:
            self.test(X_test, y_test, features, split, plot)

    def test(self, X_test, y_test, features, split, plot):
        """Test model"""
        model_y_pred = self.model.predict(X_test)

        results_df = X_test
        results_df = results_df.drop(columns=features)
        results_df["y_real"] = y_test
        results_df["y_pred"] = model_y_pred
        results_df["err"] = np.absolute(results_df["y_real"] -
                                        results_df["y_pred"])
        results_df["%_err"] = ((results_df["err"]) /
                               (np.absolute(results_df["y_real"])) * 100)

        logging.info("Test results with %s split:", split)
        logging.info("\t RMSE: %.3f",
                     mean_squared_error(y_test, model_y_pred)**0.5)
        logging.info("\t R^2: %.3f", r2_score(y_test, model_y_pred))

        if plot:
            logging.info("Generating plots")
            plots.plot_performance_results(y_test, model_y_pred)
            plots.plot_shap_results(X_test, features, self.model)

    def predict(self, filename, previous_year, year, *args, **kwargs):
        """Make predictions for next year GDP growth,
        returns a pandas df"""
        self.load(filename)
        predictions, X_predict = _io.retrieve_predict_dataset(
            previous_year, year)
        predictions["Value"] = self.model.predict(X_predict, *args, **kwargs)
        return predictions

    def save(self, filename):
        """ Save model to file"""
        self.model.save_model(filename)
        logging.info("Model saved")

    def load(self, filename):
        """ Load model from file"""
        self.model.load_model(filename)
        logging.info("Model loaded")
示例#5
0
def get_regressors():

    a = XGBRegressor()
    a_model_path = os.path.join(STORAGE, "modela.xgb")
    a.load_model(a_model_path)
    b = XGBRegressor()
    b_model_path = os.path.join(STORAGE, "modelb.xgb")
    b.load_model(b_model_path)

    return a, b
示例#6
0
    def pred_psm(self, path, main_df_col, historical_postal_code_area,
                 area_centroids, sch_gdf, train_gdf, police_centre_gdf,
                 avg_cases_by_npc):
        '''
        :param path: takes in path where model weights and scalers are stored
        :param main_df_col: list of training dataset column names so that prediction df tallies
        :param historical_postal_code_area: to get planning area/region of postal code if it is in our dataset instead of using distance measures due to differences in areas returned for some postal codes
        :param area_centroids: to get the planning area property is in
        :param sch_gdf: to get nearest school distance
        :param train_gdf: to get nearest stations/lines
        :param police_centre_gdf: to get nearest police centre
        :param avg_cases_by_npc: to get avg crime cases per year for nearest police centre
        :return: predicted price per sqm
        '''
        property_df = self.convert_to_df(main_df_col,
                                         historical_postal_code_area,
                                         area_centroids, sch_gdf, train_gdf,
                                         police_centre_gdf, avg_cases_by_npc)
        s_scaler = joblib.load(path + 'standard_scaler.bin')
        mm_scaler = joblib.load(path + 'mm_scaler.bin')
        standardScale_vars = [
            'Area (SQM)', 'Floor Number', 'PPI', 'Average Cases Per Year',
            'Nearest Primary School', 'nearest_station_distance'
        ]
        minMax_vars = ['Remaining Lease']
        s_scaled = pd.DataFrame(s_scaler.transform(
            property_df.loc[:, standardScale_vars].copy()),
                                columns=standardScale_vars)
        mm_scaled = pd.DataFrame(mm_scaler.transform(
            property_df.loc[:, minMax_vars].copy()),
                                 columns=minMax_vars)

        property_df_scaled = pd.concat([
            s_scaled, mm_scaled,
            property_df.loc[:, 'Ang Mo Kio':'Executive Condominium'].copy()
        ],
                                       axis=1)

        # Initialize model
        model = XGBRegressor()

        # Load model
        model.load_model(path + 'model_xgboost.bin')

        # Use the loaded model to make predictions
        prediction = model.predict(property_df_scaled)[0]

        # Covert prediction in SQM to SQFT
        prediction = prediction / 10.7639

        return prediction
示例#7
0
class XGBModel(GenericModel):
    def __init__(self, name, version=1, classifier=True, xgb_kwargs=None):
        super().__init__(name, version)
        self.xgb_kwargs = xgb_kwargs
        if classifier:
            self.model = XGBClassifier(**xgb_kwargs)
        else:
            self.model = XGBRegressor(**xgb_kwargs)

    def train(self):
        print(
            'No custom train method implemented. Instead call self.model.fit(...)'
        )

    def save_model(self,
                   notes=None,
                   update_version=False,
                   config=None,
                   save_attributes=True):
        if update_version:
            self.version += 1

        try:
            model_path = self.model_dir / Path(f'v{self.version}.json')
            self.model.save_model(model_path.as_posix())
        except Exception as e:
            print('Error saving model')
            print(e)
            raise

        if save_attributes:
            self._save_attributes()

        if notes is not None:
            self._save_notes(notes)

        if config is not None:
            self._save_config(config)

    def load_model(self, version, load_attributes=True):
        # First load the xgb_kwargs so that we can create a new instance of XGB
        self._load_attributes(self.attr_dir)
        if hasattr(self, 'xgb_kwargs'):
            self.model = self.model(self.xgb_kwargs)

        # Next load the model
        model_path = self.model_dir / Path(f'v{self.version}.json')
        assert model_path.exists(
        ), f'No model exists at {model_path.as_posix()}'
        self.model.load_model(model_path)
def model_predict(s):

    param = {
        'colsample_bytree': 0.8,
        'subsample': 0.75,
        'eta': 0.02,
        'n_estimators': 1100,
        'max_depth': 7,
        'min_child_weight': 1
    }
    model = XGBRegressor(**param)
    try:
        model.load_model("./models/xgbmodelprime")
    except:
        model.load_model("./models/xgbmodel")
    y_pred = model.predict(s[[
        "date_block_num", "shop_id", "item_id", "id_struct", "item_category",
        "Price_agg", "keyz", "item_cnt_month_lag1", "item_cnt_month_lag2",
        "item_cnt_month_lag3", "item_cnt_month_lag4", "item_cnt_month_lag5",
        "item_cnt_month_lag6", "item_cnt_month_lag7", "Price_agg_lag1",
        "Price_agg_lag2"
    ]])
    #create current preds file
    s["predictions"] = y_pred
    c_pred = s[["Date", "shop_id", "item_id", "predictions"]]
    c_pred["Date"] = c_pred["Date"].astype("str")
    try:
        h_preds = pd.read_csv("data/prediction/h_predictions.csv")
    except:
        hp_df = pd.DataFrame({
            'Date': pd.Series([], dtype='str'),
            'shop_id': pd.Series([], dtype='int'),
            'item_id': pd.Series([], dtype='int'),
            'predictions': pd.Series([], dtype='float')
        })
        hp_df["Date"] = pd.to_datetime(hp_df["Date"])
        hp_df.to_csv("data/prediction/h_predictions.csv", index=False)
        h_preds = pd.read_csv("data/prediction/h_predictions.csv")
    new_dff = pd.concat(
        [h_preds[["Date", "shop_id", "item_id", "predictions"]], c_pred],
        axis=0,
        sort=False)
    new_dff1 = new_dff.drop_duplicates(["Date", "shop_id", "item_id"
                                        ]).reset_index().drop(["index"],
                                                              axis=1)
    new_dff1.to_csv("data/prediction/h_predictions.csv", index=False)
    return s[["Date", "shop_id", "item_id", "predictions"]]
示例#9
0
文件: mit_model.py 项目: cfong32/lpp
def main():
    course = 'VJx__VJx_2__3T2016'
    filename = 'model.xgb'
    X, y = load_data(course)

    # Normalization
    scaler = MinMaxScaler()
    scaler.fit(X)
    X = scaler.transform(X)

    model = XGBRegressor()
    if os.path.isfile(filename):
        model.load_model(filename)
    else:
        model.fit(X, y)
        model.save_model(filename)
    y_ = model.predict(X)
    print(y_)
示例#10
0
class XGBModel(Model):
    def Build(self):
        self.model = XGBRegressor(max_depth=10, n_estimators=1000, objective='reg:squarederror', seed=config.random_state, nthread=12, tree_method='gpu_hist')

    def Load(self, fileName):
        self.Build()
        self.model.load_model(fileName + '.xgb')

    def Save(self, fileName):
        self.model.save_model(fileName + '.xgb')

    def Fit(self, X_trn, y_trn, X_tst, y_tst, plot=False):
        self.model.fit(X_trn, y_trn, eval_metric='rmse', eval_set=[(X_trn, y_trn), (X_tst, y_tst)], verbose=True, early_stopping_rounds=50)
        if plot: 
            results = self.model.evals_result()
            loss = results['validation_0']['rmse']
            val_loss = results['validation_1']['rmse']
            plot_loss(loss, val_loss)

    def Predict(self, X):
        return self.model.predict(X).reshape(-1,1)
示例#11
0
def bulid_models(x_train, y_train, x_test, y_test, best_grida, best_gridb):
    root_folder = lib.features.STORAGE
    file_patha = os.path.join(root_folder, "modela.xgb")
    file_pathb = os.path.join(root_folder, "modelb.xgb")

    modela = XGBRegressor()
    modela.load_model(file_patha)
    y_preda = modela.predict(x_test)
    base_scorea = mean_absolute_error(y_test[:, 0], y_preda)

    modelb = XGBRegressor()
    modelb.load_model(file_pathb)
    y_predb = modela.predict(x_test)
    base_scoreb = mean_absolute_error(y_test[:, 1], y_predb)

    modela = XGBRegressor(**best_grida)
    modela = modela.fit(x_train,
                        y_train[:, 0],
                        eval_set=[(x_test, y_test[:, 0])],
                        early_stopping_rounds=100,
                        verbose=False)
    y_preda = modela.predict(x_test)
    scorea = mean_absolute_error(y_test[:, 0], y_preda)
    print("score A : {} vs {}".format(scorea, base_scorea))
    if scorea <= base_scorea:
        modela.save_model(file_patha)
        print("model A saved !")

    modelb = XGBRegressor(**best_gridb)
    modelb = modelb.fit(x_train,
                        y_train[:, 1],
                        eval_set=[(x_test, y_test[:, 1])],
                        early_stopping_rounds=100,
                        verbose=False)
    y_predb = modelb.predict(x_test)
    scoreb = mean_absolute_error(y_test[:, 1], y_predb)
    print("score B : {} vs {}".format(scoreb, base_scoreb))
    if scoreb <= base_scoreb:
        modelb.save_model(file_pathb)
        print("model B saved !")
示例#12
0
# 각 훈련별 loss값이 반환
aaa = model.score(x_test, y_test)
# print("model.score : ", aaa)

y_pred = model.predict(x_test)
r2 = r2_score(y_test, y_pred)
print("r2 : ", r2)

# aaa :  0.9329663244922279
# r2  :  0.9329663244922279
# print("===========================")
# result = model.evals_result()
# print(result)

#저장
import pickle
# pickle.dump(model, open('../data/xgb_save/m39.pickle.dat','wb'))
import joblib
# joblib.dump(model,'../data/xgb_save/m39.joblib.dat')
# model.save_model('../data/xgb_save/m39.xgb.model')
# print('저장완료')

#불러오기
# model2 = pickle.load(open('../data/xgb_save/m39.pickle.dat','rb'))
# model2 = joblib.load('../data/xgb_save/m39.pickle.dat')
model2 = XGBRegressor()
model2.load_model('../data/xgb_save/m39.xgb.model')
print('불러오기')

r22 = model2.score(x_test, y_test)
print('r22 : ', r22)
    XGboost_model = XGBoosting(
        0.7,  # subsample
        20,  # max_depth
        5,  # min_samples_split
        0.09,  # learning_rate
        'mae',  # eval_metric
        1,  # num_parallel_tree
        15)  # number of trees

    XGboost_model.fit(X=x_train, y=y_train)
    XGboost_model.save_model('xgboost_model')


if __name__ == '__main__':
    test_data = pd.read_csv('./x_test.csv')
    test_labels = pd.read_csv('./y_test.csv')
    # train_data = pd.read_csv('./x_train.csv')
    # train_labels = pd.read_csv('./y_train.csv')

    # build_models(train_data, train_labels)
    ''' Load the models from their files '''
    XGboost_model = XGBRegressor()
    XGboost_model.load_model('xgboost_model')
    # boost_RF_model = XGBRegressor()
    # boost_RF_model.load_model('RF_model')
    #
    '''' Initiate score check on the XGBoost model '''
    predictions = XGboost_model.predict(test_data)
    print(test_labels.values())
    labels_arr = test_labels.to_numpy().reshape(-1)
    print(explained_variance_score(predictions, test_labels))
示例#14
0
from xgboost import XGBRegressor

import flask
import locale
import pandas as pd

from df_schema import df_dict

model = XGBRegressor()
model.load_model('model/best_model.json')

app = flask.Flask(__name__, template_folder='templates')


def brl(value):
    locale.setlocale(locale.LC_ALL, 'pt_BR.UTF-8')
    return 'R$ {}'.format(locale.currency(value, grouping=True, symbol=False))


@app.route('/', methods=['GET', 'POST'])
def main():
    if flask.request.method == 'GET':
        return flask.render_template('main.html')

    if flask.request.method == 'POST':
        type_ = flask.request.form['type']
        neighborhood = flask.request.form['neighborhood']

        if type_ != 'Apartamento':
            df_dict[type_] = 1.
示例#15
0
class Regressor:

    # for initializing train and test sets, classifier and accuracy score
    # Change method to gpu_hist if you want xgboost to run on a GPU
    def __init__(self,
                 params={
                     'objective': 'reg:squarederror',
                     'verbosity': 0
                 }):
        self.X_train = []
        self.X_labels = []
        self.test = []
        self.test_labels = []
        self.model = XGBRegressor(**params)
        self.prediction = 0
        self.error = 0

    def size(self):
        if isinstance(self.X_train, np.ndarray):
            return self.X_train.size
        return len(self.X_train)

    # adding the data points
    def input_train(self, features, feature):
        if isinstance(self.X_train, np.ndarray) and self.X_train.size > 0:
            self.X_train = self.X_train.tolist()
            self.X_labels = self.X_labels.tolist()
        self.X_train.append(features)
        self.X_labels.append(feature)

    # train the data
    def train(self):
        self.X_train = np.asarray(self.X_train)
        self.X_labels = np.asarray(self.X_labels)
        self.model.fit(self.X_train, self.X_labels)

    def train_eval(self, metric='error'):
        self.X_train = np.asarray(self.X_train)
        self.X_labels = np.asarray(self.X_labels)
        X_train, X_test, y_train, y_test = train_test_split(self.X_train,
                                                            self.X_labels,
                                                            test_size=0.33)
        self.model.fit(X_train,
                       y_train,
                       eval_set=[(X_train, y_train), (X_test, y_test)],
                       eval_metric=metric)
        evals_result = self.model.evals_result()
        if metric == 'error':
            validations = []
            for val in evals_result.values():
                lst = val.get("error")
                validations.append(sum(lst) / len(lst))
            return 1 - (sum(validations) / len(validations))
        else:
            validations = []
            for val in evals_result.values():
                lst = val.get(metric)
                validations.append(lst[-1])
            return validations

    # input test labels if you want to check accuracy
    def label(self, label):
        self.test_labels.append(label)

    def input_test(self, features):
        if isinstance(self.test, np.ndarray) and self.test.size > 0:
            self.test = self.test.tolist()
        self.test.append(features)

    # test data
    def predict(self):
        if not isinstance(self.test, np.ndarray):
            self.test = np.asarray(self.test)
        self.prediction = self.model.predict(self.test)
        return self.prediction

    # if you have the test labels you can check the error rate (you want error close to 0)
    def check_error(self):
        self.test_labels = np.asarray(self.test_labels)
        self.error = metrics.mean_absolute_error(self.test_labels,
                                                 self.prediction)
        return self.error

    # save classifier
    def save_classifier(self, file):
        self.model.save_model(file)

    # open saved classifier
    def open_classifier(self, file):
        self.model.load_model(file)

    # removes all training data
    def clean_train(self):
        self.X_train = []
        self.X_labels = []

    # removes all testing data
    def clean_test(self):
        self.test = []
        self.test_labels = []
示例#16
0
def load(name):
    model = XGBRegressor()
    model.load_model(MODEL_DIRECTORY + name + ".json")
    return model
示例#17
0
y_pred = model.predict(x_test)
r2 = r2_score(y_test, y_pred)
print('r2 : ', r2)

result = model.evals_result()
# print(result)

# 모델 저장
import pickle
import joblib

# pickle.dump(model, open('../data/xgb_save/m_39.pickle.dat','wb'))  # dump : save랑 같다
# print('저장')

# joblib.dump(model,('../data/xgb_save/m_40.jolib.dat'))
# print('저장')

# model.save_model('../data/xgb_save/m_41.xgb.dat')
# print('저장')

# print('============================================================')
# # 모델 불러오기
# # model2 = pickle.load(open('../data/xgb_save/m_39.pickle.dat','rb'))
# model2 = joblib.load('../data/xgb_save/m_40.jolib.dat')
model2 = XGBRegressor()
model2.load_model('../data/xgb_save/m_41.xgb.dat')

print('불러오기')
r22 = model2.score(x_test, y_test)
print(r22)
示例#18
0
def load_model(model_name):
    model = XGBRegressor()
    model.load_model(f"resources/{model_name}.json")
    return model
plt.xticks(rotation=90, fontsize=12)
plt.yticks(fontsize=12)
plt.grid(alpha=0.6)
plt.ylim(0, 0.6)
plt.xlabel('Feature', fontsize=20)
plt.ylabel('Importance', fontsize=20)
plt.axes().set_axisbelow(True)
plt.savefig(
    f'ModelComparison/plots/PROACT_{model_name}_feature_imprtance_last_30.png',
    quality=100,
    bbox_inches='tight')
plt.show()

i = 1
model = XGBRegressor()
model.load_model(
    f'C:/Users/Ben/Desktop/Results/PROACT/XGB/models/model_{i}.model')

model.feature_importances_

PROACT_feature_importances = pd.DataFrame(
    columns=['feature', 'importance', 'iter'])
for i in range(60):
    try:
        temp = pd.DataFrame(
            dict(feature=columns, importance=a.feature_importances_, iter=i))
        PROACT_feature_importances = pd.concat(
            [PROACT_feature_importances, temp])
    except:
        pass

aa = PROACT_feature_importances.groupby(
示例#20
0
class BostonHouseFeatures(BaseModel):
    crim: float  # per capita crime rate by town
    zn: float  # proportion of residential land zoned for lots over 25,000 sq.ft.
    indus: float  # proportion of non-retail business acres per town
    chas: float  # Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
    nox: float  # nitric oxides concentration (parts per 10 million)
    rm: float  # average number of rooms per dwelling
    age: float  # proportion of owner-occupied units built prior to 1940
    dis: float  # weighted distances to five Boston employment centres
    rad: float  # index of accessibility to radial highways
    tax: float  # full-value property-tax rate per $10,000
    ptratio: float  # pupil-teacher ratio by town
    b: float  # 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
    lstat: float  # % lower status of the population


# uvicorn boston_inference:app --reload
app = FastAPI()

xgb = XGBRegressor()
xgb.load_model("xgbregressor_boston.json")


@app.post("/predict")
async def predict_house_price(features: BostonHouseFeatures):

    X = np.array(list(features.dict().values()))
    y = float(xgb.predict(np.expand_dims(X, axis=0))[0])
    return {"price": y}
示例#21
0
# importing packages
import flask
from flask import Flask
import pandas as pd
import pickle
import xgboost
from xgboost import XGBRegressor

print(pickle.format_version)
# importing model and features
ml = XGBRegressor()
#with open('model/xgb_new.pkl', 'rb') as file:
#    modelo_simples = pickle.load(file)
ml.load_model('model/xgb_new.pkl')
with open('model/features.names', 'rb') as file:
    features = pickle.load(file)

app = Flask(__name__, template_folder='templates')


@app.route('/', methods=['GET', 'POST'])
def main():
    if flask.request.method == 'GET':
        return flask.render_template('airbnb.html')

    if flask.request.method == 'POST':
        user_inputs = {
            'Latitude': flask.request.form['latitude'],
            'Longitude': flask.request.form['longitude'],
            'Minimum Nights': flask.request.form['minimum_nights'],
            'Available Days In A Year': flask.request.form['availability_365'],
col_list = list(template.columns)
def complete_cols(userinp):
    for i in range(0,len(col_list)):
        if userinp.lower() in str(col_list[i]):
            colname = str(col_list[i])
            template[colname] = 1 

#converting the binary input
bininput(quilts,'has_quilts')
bininput(logo,'has_logo')
bininput(chain,'has_chain')
bininput(otherdef,'other_defects')
bininput(odor,'has_smell')
complete_cols(material)
complete_cols(color)
complete_cols(style)
complete_cols(size)
complete_cols(year)
complete_cols(cond)
template['acc_included']=acc

#predict value
loaded_model = XGBRegressor(random_state=1)
loaded_model.load_model("xgb1_tunedCHANEL_alldata.model")
prediction = str(loaded_model.predict(template))
prediction = float((prediction.strip('[').replace('','')).strip(']').replace('',''))

#FRONT-END: OUTPUT
st.markdown("---")
st.header("The current resale value of this bag is:")
st.title(f'$%10.f'%prediction)
from numpy.core.numeric import load
from preprocessing import load_data
from feature_engineering import *
from xgboost import XGBRegressor
import pandas as pd

prediction_path = "../Case_material/predictions/predictions.csv"

(X_test, _) = load_data(train=False)

xgb_model = XGBRegressor(n_jobs=8)
xgb_model.load_model("models/xgb_handin.model")

predictions = xgb_model.predict(X_test)
pd.DataFrame({
    "Prediction": predictions
}).to_csv(prediction_path, sep=",", index=False)

print("First five predictions:", predictions[:5])
print("The predictions were place in:", prediction_path)
示例#24
0
thresholds = np.sort(model.feature_importances_)
print(thresholds)


for thresh in thresholds: 
    selection = SelectFromModel(model, threshold=thresh, prefit=True)

    select_x_train = selection.transform(x_train)

    selection_model = XGBRegressor()
    selection_model.fit(select_x_train, y_train)

    select_x_test = selection.transform(x_test)
    y_pred = selection_model.predict(select_x_test)
    
    score = r2_score(y_test, y_pred)

    print("thresh=%.3f, n = %d, R2 : %2.f%%" %(thresh, select_x_train.shape[1], score*100.0))

model.save_model('./model/xgb_save/boston_rmse')
print("저장 됬다.")

model2=XGBRegressor()
model2.load_model('./model/xgb_save/boston_rmse')
print("불러왔다.")


y_pred = model2.predict(x_test)
score = r2_score(y_pred, y_test)

print("score : ", score)
示例#25
0
def linear_model(x_train, y_train):

    reg = LinearRegression().fit(x_train, y_train)
    print(reg.score(x_train, y_train))

    return reg

if __name__ == '__main__':
    ''' build the models and put them into files '''
    train_data = pd.read_csv('./x_train.csv')
    train_labels = pd.read_csv('./y_train.csv')
    # build_models(train_data, train_labels)

    ''' Load the models from their files '''
    XGboost_model = XGBRegressor()
    XGboost_model.load_model('xgboost_model')
    boost_RF_model = XGBRegressor()
    boost_RF_model.load_model('RF_model')


    '''' Initiate score check on the XGBoost model '''
    test_data = pd.read_csv('./x_test.csv')
    test_labels = pd.read_csv('./y_test.csv')

    # Put test_labels into an array
    y_test_num = pd.Series(test_labels.iloc[:, 0]).tolist()
    y_test_num = [round(value) for value in y_test_num]

    ''' XGBoost Predictor '''
    predictions = XGboost_model.predict(test_data)
    predictions = [round(value) for value in predictions]
示例#26
0
class FeatureRegressorXGB():
    def __init__(self, modelfile='featureregressor_xgb.bin'):
        pwd = os.path.dirname(__file__)
        self.model = XGBRegressor()
        self.model.load_model(pwd + '/models/' + modelfile)

    def check_errors(self, sim):
        if sim.N_real < 4:
            raise AttributeError(
                "SPOCK Error: SPOCK only applicable to systems with 3 or more planets"
            )

    def predict(self, sim):
        """
        Predict instability time (log10(T)) of passed simulation

        Parameters:

        sim (rebound.Simulation): Orbital configuration to test

        Returns:

        float:  Estimated instability log10(time)
                

        """
        triofeatures, stable = self.generate_features(sim)
        if stable == False:
            return 4.0

        triovals = self.predict_from_features(triofeatures)
        return triovals.min()  # minimum time among all trios tested

    def generate_features(self, sim):
        """
        Generates the set of summary features used by the feature classifier for prediction. 

        Parameters:

        sim (rebound.Simulation): Orbital configuration to test

        Returns:

        List of OrderedDicts:   A list of sets of features for each adjacent trio of planets in system.
                                Each set of features is an ordered dictionary of 10 summary features. See paper.
       
        stable (int):           An integer for whether the N-body integration survived the 10^4 orbits (1) or 
                                went unstable (0).
        """
        sim = sim.copy()
        init_sim_parameters(sim)
        self.check_errors(sim)

        trios = [[i, i + 1, i + 2]
                 for i in range(1, sim.N_real - 2)]  # list of adjacent trios
        featureargs = [10000, 80, trios]
        triofeatures, stable = features(
            sim, featureargs)  # stable will be 0 if an orbit is hyperbolic
        # sim.dt = nan in init_sim_parameters

        return triofeatures, stable

    def predict_from_features(self, triofeatures):
        """
        Estimate probability of stability from the list of features created by FeatureClassifier.generate_features.

        Parameters:

        triofeatures (List of Ordered Dicts):   Sets of features for each adjacent planet trio
                                                (returned from FeatureClassifier.generate_features)

        Returns:

        list (float): Estimated probabilities of stability for set of features passed (for each adjacent trio of planets).
        """

        # xgboost model expects a 2D array of shape (Npred, Nfeatures) where Npred is number of samples to predict, Nfeatures is # of features per sample
        expected_features = (
            "EMcrossnear MMRstrengthnear MMRstrengthfar EPstdnear".split(' ') +
            "EMfracstdfar EMfracstdnear EMcrossfar EPstdfar MEGNOstd".split(
                ' ') + "MEGNO".split(' '))
        featurevals = np.array([[obj[feat] for feat in expected_features]
                                for obj in triofeatures])
        predictions = self.model.predict(featurevals)

        return predictions
示例#27
0
class RaceStrategyModel(object):
    def __init__(self, year: int, verbose=False, n_cores=1):
        print("XGB using {} threads".format(n_cores))
        self.regular_model = XGBRegressor(n_jobs=n_cores)
        self.pit_model = XGBRegressor(n_jobs=n_cores)
        self.safety_model = XGBRegressor(n_jobs=n_cores)
        self.test_race = None
        self.scaler = None
        self.test_race_pit_model = None
        self.dummy_columns = None
        self.n_cores = n_cores
        # self.start_lap = start_lap

        if year == 2014:
            year = "year_1"
        elif year == 2015:
            year = "year_2"
        elif year == 2016:
            year = "year_3"
        elif year == 2017:
            year = "year_4"
        elif year == 2018:
            year = "year_5"
        elif year == 2019:
            year = "year_6"
        else:
            raise ValueError("No race available for year " + str(year))

        self.year = year
        self.verbose = verbose

    def split_train_test(self, df: pd.DataFrame, split_fraction: float):
        """ Split the dataset randomly but keeping whole races together """
        test_data = pd.DataFrame(columns=df.columns)

        races = df[df[self.year] == 1]['raceId'].unique()

        if split_fraction != 0:
            split_size = int(round(split_fraction * len(races)))
        else:
            # Leave only one race out from the training
            split_size = 1

        test_races = np.random.choice(races, size=split_size)
        for race in test_races:
            race_laps = df.loc[df['raceId'] == race]
            test_data = test_data.append(race_laps)
            df = df[df.raceId != race]

        return df, test_data

    def normalize_dataset(self, df):
        """ Normalize integer-valued columns of the dataset """
        data = df.copy()
        # print(df.columns)
        # Remove columns not to be normalized
        zero_one = [
            'battle', 'drs', "circuitId_1", "circuitId_2", "circuitId_3",
            "circuitId_4", "circuitId_6", "circuitId_7", "circuitId_9",
            "circuitId_10", "circuitId_11", "circuitId_13", "circuitId_14",
            "circuitId_15", "circuitId_17", "circuitId_18", "circuitId_22",
            "circuitId_24", "circuitId_32", "circuitId_34", "circuitId_69",
            "circuitId_70", "circuitId_71", "circuitId_73", "tyre_1", "tyre_2",
            "tyre_3", "tyre_4", "tyre_5", "tyre_6", "year_1", "year_2",
            "year_3", "year_4", "year_5", "year_6", "nextLap", 'pit', 'safety',
            "unnorm_lap"
        ]
        #'milliseconds',
        #'cumulative', 'unnorm_lap']

        temp_df = data[zero_one].copy()
        data.drop(zero_one, axis=1, inplace=True)

        # if self.columns is not None and len(data.columns) != len(self.columns):
        #     print(set(data.columns).difference(set(self.columns)))
        #     exit(-1)

        if not self.scaler:
            self.scaler = MinMaxScaler(feature_range=(-1, 1))
            self.scaler.fit(data)
            scaled = data
        else:
            scaled = self.scaler.transform(data)
        data.loc[:, :] = scaled
        data = data.join(temp_df)

        del temp_df
        return data

    def __process_dataset(self, dataset):
        """ Pre-process the dataset to obtain training data and its labels"""

        # Discard wet and suspended races
        old_races = len(dataset['raceId'].unique())
        dataset = discard_wet(dataset)
        dataset = discard_suspended_races(dataset)
        new_races = len(dataset['raceId'].unique())
        if self.verbose:
            print(
                "{} wet and suspended races were discarded".format(old_races -
                                                                   new_races))

        # Eliminate the last lap from the training data, as it has 0 target
        dataset = dataset[dataset['nextLap'] > 0]

        # Express the next lap target as a delta to the pole lap
        dataset['nextLap'] = (dataset['nextLap'] - dataset['pole'])

        # Duplicate columns to use them after normalization
        dataset['base'] = dataset['pole'].astype(int)
        dataset['true'] = dataset['milliseconds'].astype(int)
        dataset['true_cumulative'] = dataset['cumulative'].astype(int)

        # Normalize the dataset, but normalize the lap time and cumulative time individually, in order to be able to
        # normalize them at runtime

        # Remove the duplicated unnormalized columns from the train data
        dataset = dataset.drop(columns=['base', 'true', 'true_cumulative'])
        dataset = self.normalize_dataset(dataset)

        _, self.test_race = self.split_train_test(dataset, split_fraction=0)
        self.__compute_pitstop_model(dataset)

        self.dummy_columns = dataset.columns
        train_data = self.normalize_dataset(dataset)

        # train_data = train_data[train_data['unnorm_lap'] > self.start_lap]  # Take laps after a threshold

        # Remove columns used only to identify the laps in testing
        train_data = train_data.drop(
            columns=['unnorm_lap', "raceId", "driverId", "race_length"])

        # Split the dataset into three separate datasets, one per each model to be trained
        train_pit = deepcopy(train_data.loc[train_data['pit'] != 0])
        train_safety = deepcopy(train_data.loc[(train_data['safety'] != 0)
                                               & (train_data['pit'] == 0)])
        train_regular = deepcopy(train_data.loc[(train_data['pit'] == 0)
                                                & (train_data['safety'] == 0)])

        # Remove features related to pit and safety in the "regular" laps model
        train_regular = train_regular.drop(
            columns=['safety', 'pit', 'pit-cost', 'pitstop-milliseconds'])

        # Extract the target labels
        labels_pit = train_pit.pop('nextLap')
        labels_safety = train_safety.pop('nextLap')
        labels_regular = train_regular.pop('nextLap')

        train_data = {
            'regular': train_regular,
            'safety': train_safety,
            'pit': train_pit
        }
        labels = {
            'regular': labels_regular,
            'safety': labels_safety,
            'pit': labels_pit
        }

        return train_data, labels

    def __compute_pitstop_model(self, full_dataset: pd.DataFrame):
        """Compute a normal distribution's parameters for each driver's pit-stop times"""

        circuit = get_current_circuit(self.test_race)

        pits = []
        pits_safety = []

        stop_laps = full_dataset[(full_dataset['pitstop-milliseconds'] > 0) & (
            full_dataset[circuit] == 1)].sort_values('lap')

        pit_times = stop_laps[stop_laps['safety'] ==
                              0]['pitstop-milliseconds'].values
        pit_safety_times = stop_laps[
            stop_laps['safety'] > 0]['pitstop-milliseconds'].values
        pits.extend(pit_times.tolist())
        pits_safety.extend(pit_safety_times.tolist())

        safety_mean = np.mean(
            pit_safety_times) if len(pit_safety_times) > 0 else 0
        safety_std = np.std(
            pit_safety_times) if len(pit_safety_times) > 0 else 0

        mean = np.mean(pit_times) if len(pit_times) > 0 else 0
        std = np.std(pit_times) if len(pit_times) > 0 else 0

        self.test_race_pit_model = {
            'regular': (mean, std),
            'safety': (safety_mean, safety_std)
        }

    def train(self):
        """ Train the regression models """
        if self.verbose:
            print('Training models...')
        self.scaler = None
        if self.verbose:
            print("Model uses {} cores".format(self.n_cores))
        # self.regular_model = XGBRegressor(n_jobs=self.n_cores)
        # self.pit_model = XGBRegressor(n_jobs=self.n_cores)
        # self.safety_model = XGBRegressor(n_jobs=self.n_cores)

        dataset = load_dataset()
        datasets, labels = self.__process_dataset(dataset)

        self.regular_model.fit(datasets['regular'], labels['regular'])
        self.pit_model.fit(datasets['pit'], labels['pit'])
        self.safety_model.fit(datasets['safety'], labels['safety'])

        if self.verbose:
            print('Done!\n')

    def resplit(self):
        # TODO fix the invalidation of scaler to avoid the normalization of test races
        self.scaler = None
        dataset = load_dataset()
        self.__process_dataset(dataset)
        self._test_race = fix_data_types(self.test_race)
        self.laps_database = defaultdict(lambda: None)
        self.race_id = self.test_race["raceId"].values[0]

        for i in range(self.test_race["lap"].count()):
            row = self.test_race.iloc[[i]]
            self.laps_database[(row["driverId"].values[0],
                                row["lap"].values[0])] = row

    def load(self):
        """ Restore prediction models from previously pickled files to avoid retraining """

        if self.verbose:
            print("Loading prediction models from pickled files...")
        if not os.path.isfile(
                "./envs/race_strategy_model/pickled_models/regular.model"):
            print("ERROR: regular.model is missing")
            exit(-1)
        else:
            self.regular_model.load_model(
                './envs/race_strategy_model/pickled_models/regular.model')

        if not os.path.isfile(
                "./envs/race_strategy_model/pickled_models/safety.model"):
            print("ERROR: safety.model is missing")
            exit(-1)
        else:
            self.safety_model.load_model(
                './envs/race_strategy_model/pickled_models/safety.model')

        if not os.path.isfile(
                "./envs/race_strategy_model/pickled_models/pit.model"):
            print("ERROR: pit.model is missing")
            exit(-1)
        else:
            self.pit_model.load_model(
                './envs/race_strategy_model/pickled_models/pit.model')

        if not os.path.isfile(
                "./envs/race_strategy_model/pickled_models/scaler.pickle"):
            print("ERROR: scaler.pickle is missing")
            exit(-1)
        else:
            with open(
                    './envs/race_strategy_model/pickled_models/scaler.pickle',
                    'rb') as scaler_file:
                self.scaler = pickle.load(scaler_file)
                scaler_file.close()

        # if not os.path.isfile("pickled_models/test_race.pickle"):
        #     print("ERROR: test_race.pickle is missing")
        #     exit(-1)
        # else:
        #     with open('pickled_models/test_race.pickle', 'rb') as pit_file:
        #         self.pit_model = pickle.load(pit_file)
        #         pit_file.close()

        if self.verbose:
            print("Done!\n")

        # self.regular_model.set_params(**{"n_jobs": self.n_cores})
        # self.safety_model.set_params(**{"n_jobs": self.n_cores})
        # self.pit_model.set_params(**{"n_jobs": self.n_cores})
        print(self.regular_model.get_params())

    def save(self):
        """ Pickle the model objects to avoid retraining """

        for model, name in zip(
            [self.regular_model, self.safety_model, self.pit_model],
            ['regular', 'safety', 'pit']):
            model.save_model(
                './envs/race_strategy_model/pickled_models/{}.model'.format(
                    name))

        with open('./envs/race_strategy_model/pickled_models/scaler.pickle',
                  'wb') as savefile:
            pickle.dump(self.scaler, savefile)
            savefile.close()
        #self.test_race.to_csv(".envs/race_strategy_model/dataset/test_race.csv")

    def predict(self, state, lap_type):
        if lap_type == 'regular':
            state.drop(
                columns=['safety', 'pit', 'pit-cost', 'pitstop-milliseconds'])
            return self.regular_model.predict(state)
        elif lap_type == 'pit':
            return self.regular_model.predict(state)
        else:
            return self.safety_model.predict(state)

    def get_prediction_model(self, state: str):
        if state == 'regular':
            return self.regular_model
        if state == 'safety':
            return self.safety_model
        if state == 'pit':
            return self.pit_model
        else:
            raise ValueError(
                "The specified state is not valid, allowed model states are 'regular', 'safety' and 'pit'"
            )
import os
import numpy as np

from xgboost import XGBRegressor
from utils import (
    add_handler,
    init_logger
)


ROOT = os.path.abspath(os.path.dirname(__file__))
PATH = 'xgb_trained.bin'

# Load trained model
xgb_trained = XGBRegressor()
xgb_trained.load_model(os.path.join(ROOT, PATH))

def handler(event, context): 

    # Initialize Logger
    log = init_logger()
    log = add_handler(log)

    input_data = json.loads(event['body'])
    log.info(f"Input data: {input_data}")

    # Retrieve inputs
    input_X = input_data['input_X']

    # Process input image
    log.info(f"INFO -- Processing input data")
示例#29
0
def main():
    print("Loading data...")
    # The training data is used to train your model how to predict the targets.
    training_data = read_csv("numerai_training_data.csv")
    # The tournament data is the data that Numerai uses to evaluate your model.
    tournament_data = read_csv("numerai_tournament_data.csv")

    feature_names = [
        f for f in training_data.columns if f.startswith("feature")
    ]
    print(f"Loaded {len(feature_names)} features")

    # This is the model that generates the included example predictions file.
    # Taking too long? Set learning_rate=0.1 and n_estimators=200 to make this run faster.
    # Remember to delete example_model.xgb if you change any of the parameters below.
    model = XGBRegressor(max_depth=5,
                         learning_rate=0.01,
                         n_estimators=2000,
                         n_jobs=-1,
                         colsample_bytree=0.1)
    if MODEL_FILE.is_file():
        print("Loading pre-trained model...")
        model.load_model(MODEL_FILE)
    else:
        print("Training model...")
        model.fit(training_data[feature_names], training_data[TARGET_NAME])
        model.save_model(MODEL_FILE)

    # Generate predictions on both training and tournament data
    print("Generating predictions...")
    training_data[PREDICTION_NAME] = model.predict(
        training_data[feature_names])
    tournament_data[PREDICTION_NAME] = model.predict(
        tournament_data[feature_names])

    # Check the per-era correlations on the training set (in sample)
    train_correlations = training_data.groupby("era").apply(score)
    print(
        f"On training the correlation has mean {train_correlations.mean()} and std {train_correlations.std()}"
    )
    print(
        f"On training the average per-era payout is {payout(train_correlations).mean()}"
    )
    """Validation Metrics"""
    # Check the per-era correlations on the validation set (out of sample)
    validation_data = tournament_data[tournament_data.data_type ==
                                      "validation"]
    validation_correlations = validation_data.groupby("era").apply(score)
    print(
        f"On validation the correlation has mean {validation_correlations.mean()} and "
        f"std {validation_correlations.std(ddof=0)}")
    print(
        f"On validation the average per-era payout is {payout(validation_correlations).mean()}"
    )

    # Check the "sharpe" ratio on the validation set
    validation_sharpe = validation_correlations.mean(
    ) / validation_correlations.std(ddof=0)
    print(f"Validation Sharpe: {validation_sharpe}")

    print("checking max drawdown...")
    rolling_max = (validation_correlations + 1).cumprod().rolling(
        window=100, min_periods=1).max()
    daily_value = (validation_correlations + 1).cumprod()
    max_drawdown = -(rolling_max - daily_value).max()
    print(f"max drawdown: {max_drawdown}")

    # Check the feature exposure of your validation predictions
    feature_exposures = validation_data[feature_names].apply(
        lambda d: correlation(validation_data[PREDICTION_NAME], d), axis=0)
    max_per_era = validation_data.groupby("era").apply(
        lambda d: d[feature_names].corrwith(d[PREDICTION_NAME]).abs().max())
    max_feature_exposure = max_per_era.mean()
    print(f"Max Feature Exposure: {max_feature_exposure}")

    # Check feature neutral mean
    print("Calculating feature neutral mean...")
    feature_neutral_mean = get_feature_neutral_mean(validation_data)
    print(f"Feature Neutral Mean is {feature_neutral_mean}")

    # Load example preds to get MMC metrics
    example_preds = pd.read_csv("example_predictions.csv").set_index(
        "id")["prediction"]
    validation_example_preds = example_preds.loc[validation_data.index]
    validation_data["ExamplePreds"] = validation_example_preds

    print("calculating MMC stats...")
    # MMC over validation
    mmc_scores = []
    corr_scores = []
    for _, x in validation_data.groupby("era"):
        series = neutralize_series(pd.Series(unif(x[PREDICTION_NAME])),
                                   pd.Series(unif(x["ExamplePreds"])))
        mmc_scores.append(np.cov(series, x[TARGET_NAME])[0, 1] / (0.29**2))
        corr_scores.append(
            correlation(unif(x[PREDICTION_NAME]), x[TARGET_NAME]))

    val_mmc_mean = np.mean(mmc_scores)
    val_mmc_std = np.std(mmc_scores)
    val_mmc_sharpe = val_mmc_mean / val_mmc_std
    corr_plus_mmcs = [c + m for c, m in zip(corr_scores, mmc_scores)]
    corr_plus_mmc_sharpe = np.mean(corr_plus_mmcs) / np.std(corr_plus_mmcs)
    corr_plus_mmc_mean = np.mean(corr_plus_mmcs)
    corr_plus_mmc_sharpe_diff = corr_plus_mmc_sharpe - validation_sharpe

    print(f"MMC Mean: {val_mmc_mean}\n"
          f"Corr Plus MMC Sharpe:{corr_plus_mmc_sharpe}\n"
          f"Corr Plus MMC Diff:{corr_plus_mmc_sharpe_diff}")

    # Check correlation with example predictions
    full_df = pd.concat([
        validation_example_preds, validation_data[PREDICTION_NAME],
        validation_data["era"]
    ],
                        axis=1)
    full_df.columns = ["example_preds", "prediction", "era"]
    per_era_corrs = full_df.groupby('era').apply(
        lambda d: correlation(unif(d["prediction"]), unif(d["example_preds"])))
    corr_with_example_preds = per_era_corrs.mean()
    print(f"Corr with example preds: {corr_with_example_preds}")

    # Save predictions as a CSV and upload to https://numer.ai
    tournament_data[PREDICTION_NAME].to_csv("submission.csv", header=True)
### 방법 1 ###
# python에서 제공하는 기능
import pickle
pickle.dump(model, open('../data/xgb_save/m39.pickle.dat', 'wb')) #dump == save, write binary
print('pickle 저장 완료')

model_pic = pickle.load(open('../data/xgb_save/m39.pickle.dat', 'rb'))
print('pickle 불러오기 완료')
r2_pic = model_pic.score(x_test, y_test)
print('r2 pickle :', r2_pic)

### 방법 2 ###
import joblib
joblib.dump(model, '../data/xgb_save/m40.joblib.dat') # pickle과 달리 open 없이 경로만 쓰면 됨
print('joblib 저장하기 완료')

model_job = joblib.load('../data/xgb_save/m40.joblib.dat')
print('joblib 불러오기 완료')
r2_job = model_job.score(x_test, y_test)
print('r2 joblib :', r2_job)

### 방법 3 ###
# xgb 자체
model.save_model("../data/xgb_save/m41.xgb.model")
print('xgb model 저장하기 완료')

model_xgb = XGBRegressor()
model_xgb.load_model('../data/xgb_save/m41.xgb.model')
print('xgb model 불러오기 완료')
r2_xgb = model_xgb.score(x_test, y_test)
print('r2 xgb model : ', r2_xgb)