def check_min_data(trial, params, **args): min_data = trial.suggest_int("min_data", 5, 100) params = dict(params, min_data=min_data) score = check(trial, params, **args) acc = human.humanacc(trial.user_attrs["acc"]) logger.debug("- min_data trial %d: %s [min_data=%s]" % (trial.number, acc, params["min_data"])) return score
def check_regular(trial, params, **args): lambda_l1 = trial.suggest_float("lambda_l1", 1e-8, 10.0) lambda_l2 = trial.suggest_float("lambda_l2", 1e-8, 10.0) params = dict(params, lambda_l1=lambda_l1, lambda_l2=lambda_l2) score = check(trial, params, **args) acc = human.humanacc(trial.user_attrs["acc"]) logger.debug("- regular trial %d: %s [l1=%s, l2=%s]" % (trial.number, acc, params["lambda_l1"], params["lambda_l2"])) return score
def train(f_train, f_test, d_tmp="optuna-tmp", phases="l:b:m:r", iters=100, timeout=None, init_params=None, usebar=True, min_leaves=256, max_leaves=32768): (xs, ys) = trains.load(f_train) dtrain = lgb.Dataset(xs, label=ys) testd = trains.load(f_test) if f_test != f_train else (xs, ys) os.system('mkdir -p "%s"' % d_tmp) redirect.module("optuna", os.path.join(d_tmp, "optuna.log")) params = dict(lgbooster.DEFAULTS) if init_params: params.update(init_params) pos = sum(ys) neg = len(ys) - pos #params["scale_pos_weight"] = neg / pos params["is_unbalance"] = "true" if neg != pos else "false" phases = phases.split(":") if "m" in phases: params["feature_pre_filter"] = "false" timeout = timeout / len(phases) if timeout else None iters = iters // len(phases) if iters else None args = dict(dtrain=dtrain, testd=testd, d_tmp=d_tmp, iters=iters, timeout=timeout, usebar=usebar, min_leaves=min_leaves, max_leaves=max_leaves) if init_params is not None: f_mod = os.path.join(d_tmp, "init.lgb") (score, acc, dur) = model(params, dtrain, testd, f_mod, "[init]" if usebar else None) best = (score, acc, f_mod, dur) logger.debug("- initial model: %s" % human.humanacc(acc)) else: best = (-1, None, None) for phase in phases: trial = PHASES[phase](params=params, **args) if trial.user_attrs["score"] > best[0]: best = tuple(trial.user_attrs[x] for x in ["score", "acc", "model", "time"]) params.update(trial.params) #if "num_leaves_base" in params: # params["num_leaves"] = round(2**(params["num_leaves_base"]/2)) # del params["num_leaves_base"] return best + (params, pos, neg)
def check_leaves(trial, params, min_leaves, max_leaves, **args): #num_leaves_base = trial.suggest_int('num_leaves_base', 16, 31) #num_leaves = round(2**(num_leaves_base/2)) num_leaves = trial.suggest_int('num_leaves', min_leaves, max_leaves) params = dict(params, num_leaves=num_leaves) score = check(trial, params, **args) acc = human.humanacc(trial.user_attrs["acc"]) logger.debug("- leaves trial %d: %s [num_leaves=%s]" % (trial.number, acc, params["num_leaves"])) return score
def check_bagging(trial, params, **args): bagging_freq = trial.suggest_int("bagging_freq", 1, 7) bagging_fraction = min( trial.suggest_float("bagging_fraction", 0.4, 1.0 + 1e-12), 1.0) params = dict(params, bagging_freq=bagging_freq, bagging_fraction=bagging_fraction) score = check(trial, params, **args) acc = human.humanacc(trial.user_attrs["acc"]) logger.debug("- bagging trial %d: %s [freq=%s, frac=%s]" % (trial.number, acc, params["bagging_freq"], params["bagging_fraction"])) return score
def lgbtune(f_train, f_test, d_tmp="optuna-tmp", phases="l:b:m:r", iters=None, timeout=3600, init_params={}, min_leaves=256, max_leaves=32768): (_, acc, f_mod, _, params, _, _) = train(f_train, f_test, d_tmp, phases, iters, timeout, init_params, True, min_leaves, max_leaves) logger.info("") logger.info("Best model params: %s" % str(params)) logger.info("Best model accuracy: %s" % human.humanacc(acc)) logger.info("Best model file: %s" % f_mod)