Exemplo n.º 1
0
 def f(mi, s, md, r, c):
     clf = BoostedTreesClassifier(max_iterations = mi,
             step_size = s,
             max_depth = md,
             row_subsample = r,
             column_subsample = c,
             verbose = 0)
     clf.fit(X[valid_idx], y[valid_idx])
     #yhat = clf.predict_proba(X[train_idx])
     #return -log_loss(y[train_idx], yhat)
     return clf.score(X[train_idx], y[train_idx])
Exemplo n.º 2
0
def OptBTC(step_size=.5,
           max_iterations=100,
           row_subsample=.9,
           column_subsample=.9,
           max_depth=8):
    model = BoostedTreesClassifier(step_size=step_size,
                                   max_iterations=max_iterations,
                                   row_subsample=row_subsample,
                                   column_subsample=column_subsample,
                                   max_depth=max_depth)
    logger.info("Params: %s", model.get_params())
    return ReportPerfCV(model, "original", y)
Exemplo n.º 3
0
 def f(params):
     mi = params['mi']
     md = params['md']
     s = params['s']
     r = params['r']
     c = params['c']
     mi = int(mi)
     md = int(md)
     clf = BoostedTreesClassifier(max_iterations = mi,
             step_size = s,
             max_depth = md,
             row_subsample = r,
             column_subsample = c,
             verbose = 0)
     clf.fit(X[valid_idx], y[valid_idx])
     yhat = clf.predict_proba(X[train_idx])
     return log_loss(y[train_idx], yhat)
Exemplo n.º 4
0
     'row_subsample': Uniform(.3, 1.),
     'column_subsample': Uniform(.3, 1.),
     'min_child_weight': LogUniform(.01, 100),
     'min_loss_reduction': Uniform(0.0001, 10)
 }
 from gl import BoostedTreesClassifier
 logger2 = logging.getLogger('graphlab')
 logger2.setLevel(logging.CRITICAL)
 CONFIG['ensemble_list'] = [
     'btc', 'btc2', 'btc3', 'btc4', 'svc', 'svc2', 'svc3', 'nn', 'nn2',
     'nic', 'mpc', 'knc', 'etc', 'cccv', 'log', 'crfcbag', 'cetcbag',
     'keras'
 ]
 X, Xtest = GetDataset('ensemble', ensemble_list=CONFIG['ensemble_list'])
 print "lkjr"
 clf = GaussianProcessCV(estimator=BoostedTreesClassifier(verbose=False),
                         param_distributions=param_distributions,
                         kernel=DoubleExponential,
                         scoring=LogLoss,
                         mu_prior=-1.,
                         sigma_prior=.30,
                         sig=.01,
                         cv=5,
                         max_iter=55,
                         random_state=1,
                         time_budget=24 * 3600)
 clf.fit(X, y)
 clf = RandomSearchCV(estimator=BoostedTreesClassifier(verbose=False),
                      param_distributions=param_distributions,
                      cv=5,
                      max_iter=100,
Exemplo n.º 5
0
        res_cv   = res_cv   + yhat_cv
        res_test = res_test + yhat_test
        print i, log_loss(y, yhat_cv)
    res_cv = res_cv/n_bag
    res_test = res_test/n_bag
    np.savez_compressed('../Submission/yhat_cetcbag_full.npz', yhat = res_cv)
    np.savez_compressed('../Submission/yhat_cetcbag_test.npz', yhat = res_test)
if False:
    X, y, Xtest = LoadData()
    from gl import BoostedTreesClassifier
    import json
    import time
    params = json.load(open('../Params/Best/BTC:original_saved_params.json'))
    params = params['BTC:original']
    np.random.seed(1)
    clf = BoostedTreesClassifier(**params)
    res_cv, res_test = 0, 0
    n_bag = 10

    time_before = time.time()
    for i in xrange(n_bag):
        yhat_cv, yhat_test = GetYhat(clf, 'original', y)
        res_cv   = res_cv   + yhat_cv
        res_test = res_test + yhat_test
        print i, log_loss(y, yhat_cv)
        if time.time() - time_before > 9*3600: break
    res_cv = res_cv/(i+1)
    res_test = res_test/(i+1)
    np.savez_compressed('../Submission/yhat_btcbag_full.npz', yhat = res_cv)
    np.savez_compressed('../Submission/yhat_btcbag_test.npz', yhat = res_test)
if True:
Exemplo n.º 6
0
import numpy as np
import itertools
from scipy.stats import uniform
from scipy.stats import randint
import logging 

from ml import *


X, _ = GetDataset('original')
_, y, _ = LoadData()

from gl import BoostedTreesClassifier
clf = BoostedTreesClassifier(verbose = 0)
clf.fit(X[:10], y[:10])
np.random.seed(1)
if False:
    Cs = [.001, .01, .1, 1., 10.]
    gammas = [.001, .01, .1, 1., 10.]
    res = []; i = 0
    for (C, gamma) in itertools.product(Cs, gammas):
        print i, C, gamma; i += 1
        clf = SVC(C = C, gamma = gamma)
        clf.fit(X[train], y[train])
        res.append(clf.score(X[valid], y[valid]))

    res2 = []
    for i in xrange(len(res)*100):
        C     = 10**uniform(-3.5,5).rvs()
        gamma = 10**uniform(-3.5,5).rvs()
        print i, C, gamma