import numpy as np
from functools import partial
import xgboost as xgb

seed = 1
np.random.seed(seed)

from million import tools, data, model_params
from million._config import NULL_VALUE
from million.experiments.try_stack import n_folds, n_models
from kfuncs import tools as ktools
import seamless as ss
from scipy.optimize import minimize

cache_dir = tools.cache_dir()
LOG_FILE = tools.experiments() + 'millions_try_stackcomp.txt'


def optimise_weights(preds, targets, init_weights, minimise=True):
    constraints = ({'type': 'eq', 'fun': lambda w: 1 - sum(w)}, )
    bounds = [(-1, 1)] * len(preds)
    func = partial(optim_func, preds=preds, targets=targets)
    result = minimize(func,
                      init_weights,
                      method='SLSQP',
                      bounds=bounds,
                      constraints=constraints)
    return result


def optim_func(weights, preds, targets):
示例#2
0
import numpy as np
from sklearn.metrics import mean_squared_error

from million import data, features, tools
from million._config import NULL_VALUE, test_columns, test_dates
from million import model_params

cv_flag = True
seed = 1
cv_split_ratio = 0.8
n_bags = 1

LOG_FILE = tools.experiments() + 'millions_try_xgb_.txt'
logger = tools.get_logger(LOG_FILE)

epochs = 30
batch_size = 64

if __name__ == '__main__':
    np.random.seed(seed)
    df_train, df_test = data.load_data(cache=True)
    df = data.create_fulldf(df_train, df_test)

    df = df.fillna(NULL_VALUE)
    df = data.clean_data(df)
    df = data.encode_labels(df)
    #df = features.add_features(df)

    logerror = df['logerror'].values
    targets = logerror
    df = data.select_features(df)