import numpy as np from functools import partial import xgboost as xgb seed = 1 np.random.seed(seed) from million import tools, data, model_params from million._config import NULL_VALUE from million.experiments.try_stack import n_folds, n_models from kfuncs import tools as ktools import seamless as ss from scipy.optimize import minimize cache_dir = tools.cache_dir() LOG_FILE = tools.experiments() + 'millions_try_stackcomp.txt' def optimise_weights(preds, targets, init_weights, minimise=True): constraints = ({'type': 'eq', 'fun': lambda w: 1 - sum(w)}, ) bounds = [(-1, 1)] * len(preds) func = partial(optim_func, preds=preds, targets=targets) result = minimize(func, init_weights, method='SLSQP', bounds=bounds, constraints=constraints) return result def optim_func(weights, preds, targets):
import numpy as np from sklearn.metrics import mean_squared_error from million import data, features, tools from million._config import NULL_VALUE, test_columns, test_dates from million import model_params cv_flag = True seed = 1 cv_split_ratio = 0.8 n_bags = 1 LOG_FILE = tools.experiments() + 'millions_try_xgb_.txt' logger = tools.get_logger(LOG_FILE) epochs = 30 batch_size = 64 if __name__ == '__main__': np.random.seed(seed) df_train, df_test = data.load_data(cache=True) df = data.create_fulldf(df_train, df_test) df = df.fillna(NULL_VALUE) df = data.clean_data(df) df = data.encode_labels(df) #df = features.add_features(df) logerror = df['logerror'].values targets = logerror df = data.select_features(df)