def regression(): inp_dim, out_dim = 10, 5 params = {"max_depth": args.depth, "lr": args.lr, 'loss': b"mse"} booster = GBDTMulti(LIB, out_dim=out_dim, params=params) x_train, y_train = np.random.rand(10000, inp_dim), np.random.rand(10000, out_dim) x_valid, y_valid = np.random.rand(10000, inp_dim), np.random.rand(10000, out_dim) booster.set_data((x_train, y_train), (x_valid, y_valid)) booster.train(20) booster.dump(b"regression.txt")
def classification(): inp_dim, out_dim = 10, 5 params = {"max_depth": args.depth, "lr": args.lr, 'loss': b"ce"} booster = GBDTMulti(LIB, out_dim=out_dim, params=params) x_train = np.random.rand(10000, inp_dim) y_train = np.random.randint(0, out_dim, size=(10000, )).astype("int32") x_valid = np.random.rand(10000, inp_dim) y_valid = np.random.randint(0, out_dim, size=(10000, )).astype("int32") booster.set_data((x_train, y_train), (x_valid, y_valid)) booster.train(20) booster.dump(b"classification.txt")
def classification(data, meta, depth, lr, k, one_side): print("depth: {}, lr: {}, k: {}, one_side: {}".format( depth, lr, k, one_side)) p = { 'max_depth': depth, 'max_leaves': int(0.75 * 2**depth), 'topk': k, 'loss': b"ce", 'gamma': 1e-3, 'num_threads': 8, 'max_bins': meta['bin'], 'lr': lr, 'reg_l2': 1.0, 'early_stop': 25, 'one_side': one_side, 'verbose': False, 'min_samples': 16 } m = GBDTMulti(LIB, out_dim=meta['out'], params=p) x_train, y_train, x_test, y_test = data m.set_data((x_train, y_train), (x_test, y_test)) t = time.time() m.train(ROUND) t = time.time() - t if args.time == 1: print("Average time: {:.3f}".format(t / ROUND)) else: print("Total time: {:.3f}".format(t)) del m
def regression_multi(data, meta, depth, lr): p = { 'max_depth': depth, 'max_leaves': int(0.75 * 2**depth), 'topk': 0, 'loss': b"mse", 'gamma': 1e-6, 'num_threads': args.workers, 'max_bins': meta['bin'], 'lr': lr, 'reg_l2': 1.0, 'early_stop': 0, 'one_side': True, 'verbose': False, 'hist_cache': 48, 'min_samples': 4 } m = GBDTMulti(LIB, out_dim=meta['out'], params=p) x_train, y_train, x_test, y_test = data m.set_data((x_train, y_train)) t = time.time() m.train(ROUND) t = time.time() - t del m return t
def train_gbdt_multi(data, meta): depth = cfg.Depth[args.mode][args.data] lr = cfg.Learning_rate[args.mode][args.data] p = {'max_depth': depth, 'max_leaves': int(0.75 * 2 ** depth), 'topk': 0, 'loss': b"mse", 'gamma': GAMMA, 'num_threads': 8, 'max_bins': meta['bin'], 'lr': lr, 'reg_l2': 1.0, 'early_stop': 25, 'one_side': True, 'verbose': False, 'hist_cache': 48, 'min_samples': 8} m = GBDTMulti(LIB, out_dim=meta['out'], params=p) x_train, y_train, x_test, y_test = data m.set_data((x_train, y_train), (x_test, y_test)) m.train(ROUND) preds = m.predict(x_test) del m np.save("result/gbdtm", preds)
def train_gbdt_multi(data, meta): depth = cfg.Depth[args.mode][args.data] lr = cfg.Learning_rate[args.mode][args.data] p = {'max_depth': depth, 'max_leaves': int(0.75 * 2 ** depth), 'topk': args.k, 'loss': b"ce", 'gamma': GAMMA, 'num_threads': num_threads, 'max_bins': meta['bin'], 'lr': lr, 'reg_l2': 1.0, 'early_stop': 25, 'one_side': True, 'verbose': True, 'min_samples': min_samples} m = GBDTMulti(LIB, out_dim=meta['out'], params=p) x_train, y_train, x_test, y_test = data m.set_data((x_train, y_train), (x_test, y_test)) m.train(ROUND)