'min_child_weight': 10, 'min_data_in_leaf': 150, 'reg_lambda': 0.5, # L2 regularization term on weights. 'reg_alpha': 0.5, # L1 regularization term on weights. 'colsample_bytree': 0.9, 'subsample': 0.9, # 'nthread': 32, 'nthread': cpu_count(), 'bagging_freq': 1, 'verbose':-1, 'seed': SEED } loader805 = utils_best.Loader('CV805_LB803') loader804 = utils_best.Loader('LB804') # ============================================================================= # load # ============================================================================= X_805 = loader805.train() X_804 = loader804.train() col = X_804.columns.difference(X_805.columns) X = pd.concat([X_805, X_804[col]], axis=1) y = utils.read_pickles('../data/label').TARGET #col = [c for c in X.columns if new_feature in c]
'min_child_weight': 10, 'min_data_in_leaf': 150, 'reg_lambda': 0.5, # L2 regularization term on weights. 'reg_alpha': 0.5, # L1 regularization term on weights. 'colsample_bytree': 0.9, 'subsample': 0.9, # 'nthread': 32, 'nthread': cpu_count(), 'bagging_freq': 1, 'verbose': -1, # 'seed': SEED } np.random.seed(SEED) loader = utils_best.Loader('LB804') # ============================================================================= # load # ============================================================================= # train X_train = loader.train() y_train = utils.read_pickles('../data/label').TARGET files_tr = utils.get_use_files(new_features, True) X_ = pd.concat([pd.read_feather(f) for f in tqdm(files_tr, mininterval=60)], axis=1) X_train = pd.concat([X_train, X_], axis=1) if X_train.columns.duplicated().sum() > 0:
'num_leaves': 63, 'max_bin': 255, 'min_child_weight': 10, 'min_data_in_leaf': 150, 'reg_lambda': 0.5, # L2 regularization term on weights. 'reg_alpha': 0.5, # L1 regularization term on weights. 'colsample_bytree': 0.9, 'subsample': 0.9, # 'nthread': 32, 'nthread': cpu_count(), 'bagging_freq': 1, 'verbose': -1, 'seed': SEED } loader = utils_best.Loader('CV805_LB803') # ============================================================================= # load # ============================================================================= X = loader.train() y = utils.read_pickles('../data/label').TARGET if X.columns.duplicated().sum() > 0: raise Exception(f'duplicated!: { X.columns[X.columns.duplicated()] }') print('no dup :) ') print(f'X.shape {X.shape}') gc.collect() CAT = list(set(X.columns) & set(loader.category()))