def model_train_pred(fold): model_path = os.path.join(model_dir, model_file_name + f"_FOLD{fold}.pth") tabnet_params = dict(n_d = 64, n_a = 128, n_steps = 1, gamma = 1.3,lambda_sparse = 0, n_independent = 2,n_shared = 1,optimizer_fn = optim.Adam, optimizer_params = dict(lr = self.LEARNING_RATE, weight_decay = 1e-5), mask_type = "entmax", scheduler_params = dict(mode = "min", patience = 10, min_lr = 1e-5, factor = 0.9), scheduler_fn = ReduceLROnPlateau,verbose = 10) x_fold_train, y_fold_train, x_fold_val, y_fold_val, df_test_x_copy, val_idx = \ preprocess(fold, df_train, df_train_x, df_train_y, df_test_x, no_of_components) x_fold_train, x_fold_val, df_test_x_copy = variance_threshold(x_fold_train, x_fold_val, df_test_x_copy) ### Fit ### model = TabNetRegressor(**tabnet_params) model.fit(X_train = x_fold_train.values, y_train = y_fold_train.values, eval_set = [(x_fold_val.values, y_fold_val.values)], eval_name = ["val"], eval_metric = ["logits_ll"],max_epochs = self.EPOCHS, patience = 40,batch_size = self.BATCH_SIZE, virtual_batch_size = 32,num_workers = 1,drop_last = False, loss_fn = SmoothBCEwLogits(smoothing = 1e-4, pos_weight=pos_weight)) ###---- Prediction --- oof = np.zeros(df_train_y.shape) valid_preds = 1 / (1 + np.exp(-model.predict(x_fold_val.values))) oof[val_idx] = valid_preds predictions = 1 / (1 + np.exp(-model.predict(df_test_x_copy.values))) model_path = model.save_model(model_path) return oof, predictions
def crossval_and_predict(self, n_folds: int, df: pd.DataFrame, df_test: pd.DataFrame, feature_col: list, target_col: str, model_params: dict): oof = np.zeros((len(df))) cv_preds = np.zeros((len(df_test))) kfold = KFold(n_splits=n_folds, random_state=self.random_state, shuffle=True) for train_idx, valid_idx in kfold.split(df): X_train, y_train = df[feature_col].values[train_idx], df[ target_col].values[train_idx].reshape(-1, 1) X_valid, y_valid = df[feature_col].values[valid_idx], df[ target_col].values[valid_idx].reshape(-1, 1) X_test = df_test[feature_col].values params = self.default_params() params['seed'] = self.random_state params['n_d'] = model_params['n_d'] params['n_a'] = model_params['n_d'] params['gamma'] = model_params['gamma'] params['momentum'] = model_params['momentum'] params['n_steps'] = model_params['n_steps'] params['n_shared'] = model_params['n_shared'] params['n_independent'] = model_params['n_independent'] logging.info( f'Parameters used for TabNet supervised training: {params}') unsupervised_model = TabNetPretrainer(**params) unsupervised_model.fit(X_train=X_train, eval_set=[X_valid], pretraining_ratio=0.5, max_epochs=20) model = TabNetRegressor(**params) model.fit(X_train=X_train, y_train=y_train, eval_set=[(X_valid, y_valid)], eval_name=['valid'], eval_metric=['rmse'], max_epochs=100, patience=10, batch_size=1024, from_unsupervised=unsupervised_model) oof[valid_idx] = model.predict(X_valid).squeeze() cv_preds += model.predict(X_test).squeeze() / n_folds logging.info( f'Finished fold with score {rmse(y_valid, oof[valid_idx])}') rmse_score = rmse(df[target_col], oof) return rmse_score, cv_preds
class Tabnet: def __init__(self): with open("grad_web/function/cat_dims.pkl", "rb") as fp: self.cat_dims = pickle.load(fp) fp.close() with open("grad_web/function/cat_idxs.pkl", "rb") as fp: self.cat_idxs = pickle.load(fp) fp.close() with open("grad_web/function/features.pkl", "rb") as fp: self.features = pickle.load(fp) fp.close() self.model = TabNetRegressor(n_a=64, n_d=64, cat_dims=self.cat_dims, cat_idxs=self.cat_idxs) self.model.load_model("grad_web/function/tabnet_rm_c2.zip") def predict(self, data): if type(data) == np.ndarray: return self.predict(data) elif type(data) == pd.DataFrame: # concat = list(data.columns[data.dtypes == object]) + ['hasBooking', 'hasNpay'] \ # + [c for c in data.columns if 'label_' in c] # for col in concat: # l_enc = LabelEncoder() # data[col] = data[col].fillna("Null") # data[col] = l_enc.fit_transform(data[col].values) return self.model.predict(data[self.features].values)
def fit_tabnet(x_tr, y_tr, x_va, y_va, cat_feats, args): import torch from pytorch_tabnet.tab_model import TabNetClassifier, TabNetRegressor cat_idxs = [x_tr.columns.get_loc(f) for f in cat_feats] cat_dims = x_tr[cat_feats].apply(lambda s: s.nunique()).tolist() cat_emb_dim = [i // 2 for i in cat_dims] x_tr = x_tr.values y_tr = y_tr.values.reshape(-1, 1) x_va = x_va.values y_va = y_va.values.reshape(-1, 1) params = dict( n_d=16, n_a=16, n_steps=3, gamma=1.5, n_independent=4, n_shared=4, cat_idxs=cat_idxs, cat_dims=cat_dims, cat_emb_dim=cat_emb_dim, lambda_sparse=0.0001, momentum=0.95, clip_value=2., optimizer_fn=torch.optim.Adam, optimizer_params=dict(lr=0.0005), #scheduler_params = {"gamma": 0.95, "step_size": 500}, scheduler_params={"gamma": 0.95}, scheduler_fn=torch.optim.lr_scheduler.ExponentialLR, epsilon=1e-1) clf = TabNetRegressor(**params) fit_params = { 'batch_size': 4096, 'virtual_batch_size': 1024, 'eval_set': [(x_va, y_va)], 'max_epochs': 1000, 'patience': 50, } clf.fit(x_tr, y_tr, **fit_params) tr_pred = np.clip(clf.predict(x_tr), 0, 361) va_pred = np.clip(clf.predict(x_va), 0, 361) train_score = np.sqrt(mean_squared_error(tr_pred, y_tr)) val_score = np.sqrt(mean_squared_error(va_pred, y_va)) return clf, train_score, val_score
def run_tabnet(df, max_epochs=20, device='cpu'): X, y = get_X_y_tab(df) X_train, X_val, X_test, y_train, y_val, y_test = split_data_tab(X, y) reg = TabNetRegressor(device_name=device) print('Running the TabNet DNN, this could take a while') model = reg.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_val, y_val)], eval_name=['train', 'val'], eval_metric=['logloss'], max_epochs=max_epochs) print('Fitting the test data to the model') y_pred = reg.predict(X_test) ll = round(log_loss(y_test, y_pred), 5) print(f'The Log loss is {ll}') return model
class ModelTabNetRegressor(Model): def train(self, tr_x, tr_y, va_x=None, va_y=None, te_x=None): categorical_dims = {} for col in self.categorical_features: tr_x[col] = tr_x[col].fillna("unk") va_x[col] = va_x[col].fillna("unk") te_x[col] = te_x[col].fillna("unk") categorical_dims[col] = len(set(tr_x[col].values) | set(va_x[col].values) | set(te_x[col].values)) cat_idxs = [i for i, f in enumerate(tr_x.columns) if f in self.categorical_features] cat_dims = [categorical_dims[f] for i, f in enumerate(tr_x.columns) if f in self.categorical_features] cat_emb_dim = [10 for _ in categorical_dims] for col in tr_x.columns: tr_x[col] = tr_x[col].fillna(tr_x[col].mean()) va_x[col] = va_x[col].fillna(tr_x[col].mean()) te_x[col] = te_x[col].fillna(tr_x[col].mean()) self.model = TabNetRegressor(cat_dims=cat_dims, cat_emb_dim=cat_emb_dim, cat_idxs=cat_idxs) self.model.fit(X_train=tr_x.values, y_train=tr_y.values.reshape(-1, 1), X_valid=va_x.values, y_valid=va_y.values.reshape(-1, 1), max_epochs=1000, patience=50, batch_size=1024, virtual_batch_size=128) def predict(self, te_x): return self.model.predict(te_x.values).reshape(-1, ) def save_model(self): model_path = os.path.join('../output/model', f'{self.run_fold_name}.model') os.makedirs(os.path.dirname(model_path), exist_ok=True) Data.dump(self.model, model_path) def load_model(self): model_path = os.path.join('../output/model', f'{self.run_fold_name}.model') self.model = Data.load(model_path)
class TabNetModel: def __init__(self, feat_sel = None): if feat_sel is not None: self.pipeline = Pipeline([ ('preprocessing', MinMaxScaler()), ('feature_selection', SelectFromModel(feat_sel)) ]) else: self.pipeline = Pipeline([ ('preprocessing', MinMaxScaler()) ]) self.feat_sel = feat_sel self.params = {'feat_sel': feat_sel} def set_params(self, **kwargs): tabnet_params, proc_params = {}, {} for key in kwargs.keys(): if key == 'feat_sel': self = TabNetRegression.TabNetModel.__init__(kwargs[key]) elif key.startswith(TabNetRegression.prefix): tabnet_params[key.replace(TabNetRegression.prefix + "__", "")] = kwargs[key] else: proc_params[key] = kwargs[key] self.tabnet = TabNetRegressor(**tabnet_params) self.pipeline.set_params(**proc_params) self.params = {**tabnet_params, **proc_params} return self def get_params(self, deep = False): return self.params def fit(self, Xtrain, Ytrain, Xvalid, Yvalid): if self.feat_sel is not None: self.pipeline.fit(Xtrain, Ytrain) else: self.pipeline.fit(Xtrain) Xtrain_scaled, Xvalid_scaled = self.pipeline.transform(Xtrain), self.pipeline.transform(Xvalid) self.tabnet.fit(Xtrain_scaled, Ytrain.flatten(), Xvalid_scaled, Yvalid.flatten()) def predict(self, X): return self.tabnet.predict(self.pipeline.transform(X))
def __call__(self, trial): df_train, df_valid = train_test_split(self.df, test_size=0.1, random_state=self.random_state) X_train, y_train = df_train[self.feature_col].values, df_train[ self.target_col].values.reshape(-1, 1) X_valid, y_valid = df_valid[self.feature_col].values, df_valid[ self.target_col].values.reshape(-1, 1) logging.info( f'Train/valid split: {X_train.shape[0]} for training, {X_valid.shape[0]} for validation' ) n_d = trial.suggest_int('n_d', 8, 64) params = self.default_params params['n_d'] = n_d params['n_a'] = n_d params['seed'] = self.random_state params['n_steps'] = trial.suggest_int('n_steps', 3, 10) params['n_shared'] = trial.suggest_int('n_shared', 2, 5) params['n_independent'] = trial.suggest_int('n_independent', 2, 5) params['momentum'] = trial.suggest_float('momentum', 0.01, 0.4) params['gamma'] = trial.suggest_float('gamma', 1.0, 2.0) model = TabNetRegressor(**params) model.fit(X_train=X_train, y_train=y_train, eval_set=[(X_valid, y_valid)], eval_metric=['rmse'], max_epochs=20, patience=10, batch_size=1024) score = rmse(y_valid, model.predict(X_valid).squeeze()) return score
class TabNetBase(AI_Base): # file_path = os.getcwd() + "\\src\\AIs\\models\\TabNetv1\\" file_path = os.getcwd() + "\\" save_name = file_path + "test_model" def __init__(self, *args, **kwargs): _TPI(self, locals()) super(TabNetBase, self).__init__(*args, **kwargs) ACT = self.env.act MATCH = self.env.match_loader self.X_train, self.X_valid, self.X_test = None, None, None self.y_train, self.y_valid, self.y_test = None, None, None self.cat_idxs, self.cat_dims, self.cat_emb_dim = MATCH.get_categorical( ) self.ai = None self._scenario_tactics = None self._scenario_matches = None self._scenario_learn_from_file = list([[ 1, # [self.epochs, [ 1, # [len(MATCH), [ 1, (self.act_register_data, dict(data=MATCH.act_get(is_flat=True))), self.act_modify_data, self.act_init_ai, # self.act_load_game, self.act_run_ai_with_learn, # self.act_test ] ], ]]) self.set_mode(self.mode) def act_register_data(self, data, is_test=False): if is_test is True: _TPI(self, locals()) else: self.X_train = np.array(self.env.match_loader.train_players) self.y_train = np.array(self.env.match_loader.train_plus) self.X_valid = np.array(self.env.match_loader.valid_players) self.y_valid = np.array(self.env.match_loader.valid_plus) self.X_test = np.array(self.env.match_loader.test_players) self.y_test = np.array(self.env.match_loader.test_plus) def act_init_ai(self, is_test=False): if is_test is True: _TPI(self, locals()) else: MATCH = self.env.match_loader self.ai = TabNetRegressor(n_steps=10, input_dim=MATCH.count_cols * MATCH.count_players, cat_dims=self.cat_dims, cat_emb_dim=self.cat_emb_dim, cat_idxs=self.cat_idxs) def act_modify_data(self, is_test=False): if is_test is True: _TPI(self, locals()) else: pass def act_load_game(self, is_test=False): if is_test is True: _TPI(self, locals()) else: save = self.save_name + ".zip" if os.path.isfile(save): print("Load Network") self.ai.load_model(save) def act_test(self, is_test=False): if is_test is True: _TPI(self, locals()) else: predictions = self.ai.predict(self.X_test) y_true = self.y_test test_score = mean_squared_error(y_pred=predictions, y_true=y_true) #np.savetxt("predict.txt", predictions, delimiter=',', fmt='%d') #np.savetxt("true.txt", y_true, delimiter=',', fmt='%d') print(test_score) def act_run_ai_with_learn(self, is_test=False): if is_test is True: _TPI(self, locals()) else: self.ai.fit(X_train=self.X_train, y_train=self.y_train, X_valid=self.X_valid, y_valid=self.y_valid, max_epochs=self.epochs, patience=500, batch_size=512, drop_last=False) # self.ai.save_model(self.save_name) def act_save_model(self, is_test=False): if is_test is True: _TPI(self, locals()) else: print(self.save_name) self.ai.save_model(self.save_name)
tabnet_params['seed'] = s for fold_nb, (train_idx, val_idx) in enumerate(mskf.split(train, target)): with zipfile.ZipFile(f'TabNet_seed_{s}_fold_{fold_nb+1}.zip', 'w') as zf: zf.write( f'../input/moatabnetcorrect2/TabNet_seed_{s}_fold_{fold_nb+1}/model_params.json', arcname='model_params.json') zf.write( f'../input/moatabnetcorrect2/TabNet_seed_{s}_fold_{fold_nb+1}/network.pt', arcname='network.pt') model = TabNetRegressor() ### Predict on test ### model.load_model(f"TabNet_seed_{s}_fold_{fold_nb+1}.zip") preds_test = model.predict(X_test) test_cv_preds.append(1 / (1 + np.exp(-preds_test))) test_preds_all = np.stack(test_cv_preds) # In[115]: all_feat = [col for col in df.columns if col not in ["sig_id"]] # To obtain the same lenght of test_preds_all and submission test = pd.read_csv("../input/lish-moa/test_features.csv") sig_id = test[test["cp_type"] != "ctl_vehicle"].sig_id.reset_index(drop=True) tmp = pd.DataFrame(test_preds_all.mean(axis=0), columns=all_feat) tmp["sig_id"] = sig_id submission = pd.merge(test[["sig_id"]], tmp, on="sig_id", how="left") submission.fillna(0, inplace=True)
def run(try_num, config): args = get_args() print('config:', config.to_dict(), flush=True) print('args:', args, flush=True) os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' model_dir = f'blending-02-tabnet-{try_num}' if not os.path.exists(model_dir): os.mkdir(model_dir) train_features = pd.read_csv('../input/lish-moa/train_features.csv') train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv') dae_features = pd.read_csv(config.dae_path) test_features = pd.read_csv('../input/lish-moa/test_features.csv') if args.debug: train_features = train_features[:500] train_targets = train_targets[:500] dae_features = pd.concat([dae_features.iloc[:500], dae_features.iloc[-3982:]]).reset_index(drop=True) config.update(dict( n_folds=3, seeds=[222], n_epochs=3, batch_size=128, )) target_columns = [col for col in train_targets.columns if col != 'sig_id'] n_targets = len(target_columns) train_features, train_targets, test_features = preprocess(config, model_dir, train_features, train_targets, test_features, dae_features) features_columns = [col for col in train_features.columns if col not in ['sig_id', 'cp_type', 'cp_time', 'cp_dose', 'cp_type_ctl_vehicle', 'cp_type_trt_cp']] train_features = train_features[features_columns] test_features = test_features[features_columns] smooth_loss_function = SmoothBCEwLogits(smoothing=config.smoothing) kfold = MultilabelStratifiedKFold(n_splits=config.n_folds, random_state=42, shuffle=True) oof_preds = np.zeros((len(train_features), len(config.seeds), n_targets)) test_preds = [] for seed_index, seed in enumerate(config.seeds): print(f'Train seed {seed}', flush=True) set_seed(seed) for fold_index, (train_indices, val_indices) in enumerate(kfold.split( train_targets[target_columns].values, train_targets[target_columns].values )): print(f'Train fold {fold_index + 1}', flush=True) x_train = train_features.loc[train_indices, features_columns].values y_train = train_targets.loc[train_indices, target_columns].values x_val = train_features.loc[val_indices, features_columns].values y_val = train_targets.loc[val_indices, target_columns].values weights_path = f'{model_dir}/weights-{seed}-{fold_index}.pt' tabnet_conf = dict( seed=seed, optimizer_fn=optim.Adam, scheduler_fn=optim.lr_scheduler.ReduceLROnPlateau, n_d=32, n_a=32, n_steps=1, gamma=1.3, lambda_sparse=0, momentum=0.02, optimizer_params=dict(lr=2e-2, weight_decay=1e-5), scheduler_params=dict(mode="min", patience=5, min_lr=1e-5, factor=0.9), mask_type="entmax", verbose=10, n_independent=1, n_shared=1, ) if args.only_pred: print('Skip training', flush=True) else: model = TabNetRegressor(**tabnet_conf) model.fit( X_train=x_train, y_train=y_train, eval_set=[(x_val, y_val)], eval_name=['val'], eval_metric=['logits_ll'], max_epochs=config.n_epochs, patience=20, batch_size=1024, virtual_batch_size=32, num_workers=1, drop_last=True, loss_fn=smooth_loss_function ) model.save_model(weights_path) print('Save weights to: ', weights_path, flush=True) model = TabNetRegressor(**tabnet_conf) model.load_model(f'{weights_path}.zip') val_preds = sigmoid(model.predict(x_val)) score = mean_log_loss(y_val, val_preds, n_targets) print(f'fold_index {fold_index} - val_loss: {score:5.5f}', flush=True) oof_preds[val_indices, seed_index, :] = val_preds preds = sigmoid(model.predict(test_features.values)) test_preds.append(preds) score = mean_log_loss(train_targets[target_columns].values, oof_preds[:, seed_index, :], n_targets) print(f'Seed {seed} - val_loss: {score:5.5f}', flush=True) oof_preds = np.mean(oof_preds, axis=1) score = mean_log_loss(train_targets[target_columns].values, oof_preds, n_targets) print(f'Overall score is {score:5.5f}', flush=True) oof_pred_df = train_targets.copy() oof_pred_df.loc[:, target_columns] = oof_preds oof_pred_df.to_csv(f'{model_dir}/oof_pred.csv', index=False) test_features = pd.read_csv('../input/lish-moa/test_features.csv') submission = create_submission(test_features, ['sig_id'] + target_columns) submission[target_columns] = np.mean(test_preds, axis=0) submission.loc[test_features['cp_type'] == 'ctl_vehicle', target_columns] = 0 submission.to_csv(f'{model_dir}/submission.csv', index=False)
def train_tabnet(x_train, y_train, x_test, submission, feature_cols, target_cols, seeds, nfolds, save_path): cfg_fe = Config_FeatureEngineer() seed_everything(seed_value=cfg_fe.seed) cfg_tabnet = Config_TabNet() test_cv_preds = [] oof_preds = [] scores = [] for seed in seeds: kfold_col = f'kfold_{seed}' print("seed: {}".format(seed)) print('*' * 60) for fold in range(nfolds): oof_preds_fold = y_train.copy() oof_preds_fold.iloc[:, :] = 0 print('*' * 60) print("FOLD: {}".format(fold + 1)) print('*' * 60) trn_idx = x_train[x_train[kfold_col] != fold].index val_idx = x_train[x_train[kfold_col] == fold].index train_df = x_train[x_train[kfold_col] != fold].reset_index( drop=True) valid_df = x_train[x_train[kfold_col] == fold].reset_index( drop=True) x_tr, y_tr = train_df[feature_cols].values, train_df[ target_cols].values x_val, y_val = valid_df[feature_cols].values, valid_df[ target_cols].values # tabnet model model = TabNetRegressor( n_d=cfg_tabnet.n_d, n_a=cfg_tabnet.n_a, n_steps=cfg_tabnet.n_steps, n_independent=cfg_tabnet.n_independent, n_shared=cfg_tabnet.n_shared, gamma=cfg_tabnet.gamma, lambda_sparse=cfg_tabnet.lambda_sparse, optimizer_fn=cfg_tabnet.optimizer_fn, optimizer_params=cfg_tabnet.optimizer_params, mask_type=cfg_tabnet.mask_type, scheduler_params=cfg_tabnet.scheduler_params, scheduler_fn=cfg_tabnet.scheduler_fn, seed=seed, verbose=cfg_tabnet.verbose) # fit model model.fit( X_train=x_tr, y_train=y_tr, eval_set=[(x_val, y_val)], eval_name=["val"], eval_metric=["logits_ll"], max_epochs=cfg_tabnet.max_epochs, patience=cfg_tabnet.fit_patience, batch_size=cfg_tabnet.batch_size, virtual_batch_size=cfg_tabnet.virtual_batch_size, num_workers=1, drop_last=False, # To use binary cross entropy because this is not a regression problem loss_fn=BCEwLogitsSmooth(smooth=cfg_tabnet.labelsmooth_rate)) print('-' * 60) # save model model.save_model( os.path.join(save_path, f"TabNet_seed{seed}_FOLD{fold}")) print('*' * 60) # Predict on validation preds_val = model.predict(x_val) # Apply sigmoid to the predictions preds = 1 / (1 + np.exp(-preds_val)) score = np.min(model.history["val_logits_ll"]) oof_preds.append(preds) scores.append(score) # Save OOF for CV preds_tr = model.predict(x_train[feature_cols].values) preds = 1 / (1 + np.exp(-preds_tr)) oof_preds_fold.loc[:, target_cols] = preds oof_preds_fold.to_csv( path_or_buf=f"./TabNet_oof_preds_seed{seed}_FOLD{fold}.csv", sep=',', index=False) # Predict on test preds_test = model.predict(x_test[feature_cols].values) preds_test = 1 / (1 + np.exp(-preds_test)) test_cv_preds.append(preds_test) test_cv_preds_fold = pd.DataFrame(preds_test, columns=target_cols) test_cv_preds_fold["sig_id"] = x_test["sig_id"] test_cv_preds_fold.to_csv( path_or_buf=f"./TabNet_test_preds_seed{seed}_FOLD{fold}.csv", sep=',', index=False) oof_preds_all = np.concatenate(oof_preds) test_preds_all = np.stack(test_cv_preds) print("Averaged Best Score for CVs is: {}".format(np.mean(scores))) return test_preds_all
def pred_tabnet(x_train, y_train, x_test, submission, feature_cols, target_cols, seeds, nfolds, load_path, stacking=False): cfg_tabnet = Config_TabNet() test_cv_preds = [] oof_preds = [] scores = [] for seed in seeds: print('*' * 60) kfold_col = f'kfold_{seed}' print("seed: {}".format(seed)) print('*' * 60) for fold in range(nfolds): oof_preds_fold = y_train.copy() oof_preds_fold.iloc[:, :] = 0 test_cv_preds_fold = submission.copy() test_cv_preds_fold.iloc[:, :] = 0 print("FOLD: {}".format(fold + 1)) print('*' * 60) train_df = x_train[x_train[kfold_col] != fold].reset_index( drop=True) valid_df = x_train[x_train[kfold_col] == fold].reset_index( drop=True) x_val, y_val = valid_df[feature_cols].values, valid_df[ target_cols].values x_tot, y_tot = x_train[feature_cols].values, y_train[ target_cols].values # tabnet model model = TabNetRegressor() # save model path = os.path.join(load_path, f"TabNet_seed{seed}_FOLD{fold}") if os.path.exists(path + ".zip"): model.load_model(path + ".zip") else: tmppath = os.path.join("./", f"TabNet_seed{seed}_FOLD{fold}") shutil.make_archive(tmppath, "zip", path) model.load_model(tmppath + ".zip") os.remove(tmppath + ".zip") # Predict on validation preds_val = model.predict(x_val) # Apply sigmoid to the predictions preds = 1 / (1 + np.exp(-preds_val)) score = Logloss(y_val, preds) scores.append(score) print(f"TabNet, seed{seed}, FOLD{fold}, CV predict loss: {score}") print('*' * 60) # predict on the whole train set for sacking preds_tot = model.predict(x_tot) preds_tot = 1 / (1 + np.exp(-preds_tot)) oof_preds.append(preds_tot) # Predict on test preds_test = model.predict(x_test[feature_cols].values) preds_test = 1 / (1 + np.exp(-preds_test)) test_cv_preds.append(preds_test) oof_preds_all = np.stack(oof_preds) test_preds_all = np.stack(test_cv_preds) print("Averaged Best Score for CVs is: {}".format(np.mean(scores))) if not stacking: test_pred_final = test_preds_all.mean(axis=0) else: print("stacking...") num_models = len(seeds) * nfolds test_pred_final = np.zeros(test_preds_all.shape[1:]) weights = np.zeros(num_models) # stacking method oof_preds_all = np.array(oof_preds_all) oof_preds_all = np.reshape(oof_preds_all, (num_models, -1)) y_target = np.array(y_tot) y_target = np.reshape(y_target, (y_target.shape[0] * y_target.shape[1], -1)) oof_preds_all = oof_preds_all.T print(f"oof shape is {oof_preds_all.shape}") print(f"targets is {y_target.shape}") # calculate blend weights reg = LinearRegression().fit(oof_preds_all, y_target) weights = reg.coef_[0] intercept = reg.intercept_ test_pred_final[:, :] = intercept print(f"intercept is {intercept}") print(f"weights are {weights}") for idx in range(num_models): test_pred_final += test_preds_all[idx] * weights[idx] test_pred_final = np.clip(test_pred_final, 0, 1) return test_pred_final
# virtual_batch_size = 32, # num_workers = 1, # drop_last = False, # # To use binary cross entropy because this is not a regression problem # loss_fn = SmoothCrossEntropyLoss(smoothing=smoothing) # ) # !cp -r ../input/tabnet_models/{str(seed)}_{str(fold)}/* . # !zip {seed}_{fold}.zip model_params.json network.pt model.load_model(f'./TabNet_FOLD{fold+1}_SEED{seed}.zip') print('-' * 60) ### Predict on validation ### preds_val = model.predict(x_valid) # Apply sigmoid to the predictions preds = 1 / (1 + np.exp(-preds_val)) preds = np.clip(preds, p_min, p_max) oof_tmp[val_idx] += preds # score = np.min(model.history["val_logits_ll"]) ### Save OOF for CV ### oof_preds.append(preds) oof_targets.append(y_valid) # scores.append(score) ### Predict on test ### preds_test = model.predict(x_test) preds_test = 1 / (1 + np.exp(-preds_test)) preds_test = np.clip(preds_test, p_min, p_max)
X_train, y_train = train.values[train_idx, :], train_targets_scored.values[train_idx, :] X_val, y_val = train.values[val_idx, :], train_targets_scored.values[val_idx, :] model = TabNetRegressor(**tabnet_params) model.fit(X_train=X_train, y_train=y_train, eval_set=[(X_val, y_val)], eval_name = ["val"], eval_metric = ["logits_ll"], max_epochs=MAX_EPOCH, patience=20, batch_size=1024, virtual_batch_size=128, num_workers=1, drop_last=False, # use binary cross entropy as this is not a regression problem loss_fn=torch.nn.functional.binary_cross_entropy_with_logits) preds_val = model.predict(X_val) # Apply sigmoid to the predictions preds = 1 / (1 + np.exp(-preds_val)) score = np.min(model.history["val_logits_ll"]) # name = cfg.save_name + f"_fold{fold_nb}" # model.save_model(name) ## save oof to compute the CV later oof_preds.append(preds_val) oof_targets.append(y_val) scores.append(score) # preds on test preds_test = model.predict(X_test) test_cv_preds.append(1 / (1 + np.exp(-preds_test))) oof_preds_all = np.concatenate(oof_preds)
) clf.fit( X_train=X_trn, y_train=y_trn, eval_set=[(X_trn, y_trn), (X_vld, y_vld)], eval_name=["train", "valid"], eval_metric=["rmse"], max_epochs=2000, patience=50, batch_size=128, virtual_batch_size=128, num_workers=0, ) fold_preds = clf.predict(X_vld).astype(np.float64)[:, 0] _test_preds.append(clf.predict(X_tst)[:, 0]) oof[vld_index] = fold_preds scores.append(np.sqrt(mean_squared_error(y_vld, fold_preds))) importances = pd.concat( [ importances, pd.DataFrame({ "feature": feat_cols, "importance": clf.feature_importances_ }), ], axis=0, ) oof_preds.append(oof) test_preds.append(np.mean(_test_preds, axis=0))
def run_training_tabnet(train, test, trn_idx, val_idx, feature_cols, target_cols, fold, seed, filename="tabnet"): seed_everything(seed) train_ = process_data(train) test_ = process_data(test) train_df = train_.loc[trn_idx, :].reset_index(drop=True) valid_df = train_.loc[val_idx, :].reset_index(drop=True) x_train, y_train = train_df[feature_cols].values, train_df[ target_cols].values x_valid, y_valid = valid_df[feature_cols].values, valid_df[ target_cols].values model = TabNetRegressor( n_d=32, n_a=32, n_steps=1, lambda_sparse=0, cat_dims=[3, 2], cat_emb_dim=[1, 1], cat_idxs=[0, 1], optimizer_fn=torch.optim.Adam, optimizer_params=dict(lr=2e-2, weight_decay=1e-5), mask_type='entmax', # device_name=DEVICE, scheduler_params=dict(milestones=[100, 150], gamma=0.9), #) scheduler_fn=torch.optim.lr_scheduler.MultiStepLR, verbose=10, seed=seed) loss_fn = LabelSmoothing(0.001) # eval_metric = SmoothedLogLossMetric(0.001) # eval_metric_nosmoothing = SmoothedLogLossMetric(0.) oof = np.zeros((len(train), target.iloc[:, 1:].shape[1])) if IS_TRAIN: # print("isnan", np.any(np.isnan(x_train))) model.fit(X_train=x_train, y_train=y_train, eval_set=[(x_valid, y_valid)], eval_metric=[LogLossMetric, SmoothedLogLossMetric], max_epochs=200, patience=50, batch_size=1024, virtual_batch_size=128, num_workers=0, drop_last=False, loss_fn=loss_fn) model.save_model(f"{MODEL_DIR}/{NB}_{filename}_SEED{seed}_FOLD{fold}") #--------------------- PREDICTION--------------------- x_test = test_[feature_cols].values model = TabNetRegressor( n_d=32, n_a=32, n_steps=1, lambda_sparse=0, cat_dims=[3, 2], cat_emb_dim=[1, 1], cat_idxs=[0, 1], optimizer_fn=torch.optim.Adam, optimizer_params=dict(lr=2e-2, weight_decay=1e-5), mask_type='entmax', # device_name=DEVICE, scheduler_params=dict(milestones=[100, 150], gamma=0.9), #) scheduler_fn=torch.optim.lr_scheduler.MultiStepLR, verbose=10, seed=seed) model.load_model( f"{MODEL_DIR}/{NB}_{filename}_SEED{seed}_FOLD{fold}.model") valid_preds = model.predict(x_valid) valid_preds = torch.sigmoid( torch.as_tensor(valid_preds)).detach().cpu().numpy() oof[val_idx] = valid_preds predictions = model.predict(x_test) predictions = torch.sigmoid( torch.as_tensor(predictions)).detach().cpu().numpy() return oof, predictions
def objective(trial): SEED = 25 # [20,21,22] # all hyperparameters here # mask_type = trial.suggest_categorical("mask_type", ["entmax", "sparsemax"]) n_d = trial.suggest_int("n_d", 8, 32, step=8) n_a = trial.suggest_int("n_a", 32, 64, step=8) #n_steps = trial.suggest_int("n_steps", 1, 3, step=1) #n_shared = trial.suggest_int("n_shared", 1, 2) #n_independent = trial.suggest_int("n_independent", 1, 2, step=1) clip_value = trial.suggest_int("clip_value", 1, 2, step=1) gamma = trial.suggest_float("gamma", 1., 1.6, step=0.2) #lambda_sparse = trial.suggest_float("lambda_sparse", 1e-6, 1e-3, log=True) #batch_size = trial.suggest_int("batch_size", 512, 1024, step=256) #momentum = trial.suggest_float("momentum", 0.02, 0.1, step=0.02) #factor = trial.suggest_float("factor", 0.5, 0.9,step=0.1) for t in trial.study.trials: if t.state != optuna.structs.TrialState.COMPLETE: continue if t.params == trial.params: return t.value # Return the previous value without re-evaluating it. tabnet_params = dict(n_d=n_d, n_a=n_a, n_steps=1, gamma=gamma, n_shared=1, n_independent=1, lambda_sparse=0, #momentum = momentum, clip_value=clip_value, optimizer_fn=torch.optim.Adam, optimizer_params=dict(lr=2e-2, weight_decay=1e-5), mask_type="entmax", scheduler_params=dict(mode="min", patience=5, min_lr=1e-5, factor=0.5,), scheduler_fn=torch.optim.lr_scheduler.ReduceLROnPlateau, verbose=10, seed=SEED, ) print(m_,'params:',tabnet_params) scores_auc_all = [] test_cv_preds = [] NB_SPLITS = 7 # 7 mskf = MultilabelStratifiedKFold(n_splits = NB_SPLITS, random_state = 0, shuffle = True) oof_preds = [] oof_targets = [] scores = [] scores_auc = [] for fold_nb, (train_idx, val_idx) in enumerate(mskf.split(train, target)): print(b_,"FOLDS: ", r_, fold_nb + 1) print(b_, "FOLDS: ", r_, fold_nb + 1, y_, 'seed:', tabnet_params['seed']) print(g_, '*' * 60, c_) X_train, y_train = train.values[train_idx, :], target.values[train_idx, :] X_val, y_val = train.values[val_idx, :], target.values[val_idx, :] ### Model ### model = TabNetRegressor(**tabnet_params) ### Fit ### model.fit( X_train = X_train, y_train = y_train, eval_set = [(X_val, y_val)], eval_name = ["val"], eval_metric = ["logits_ll"], max_epochs = MAX_EPOCH, patience = 20, batch_size = 512, #1024 virtual_batch_size = 64,#32 num_workers = 1, drop_last = False, # To use binary cross entropy because this is not a regression problem loss_fn = SmoothBCEwLogits(smoothing =0.0005) ) print(y_, '-' * 60) ### Predict on validation ### preds_val = model.predict(X_val) # Apply sigmoid to the predictions preds = 1 / (1 + np.exp(-preds_val)) score = np.min(model.history["val_logits_ll"]) ### Save OOF for CV ### oof_preds.append(preds_val) oof_targets.append(y_val) scores.append(score) ### Predict on test ### #preds_test = model.predict(X_test) #test_cv_preds.append(1 / (1 + np.exp(-preds_test))) #oof_preds_all = np.concatenate(oof_preds) #oof_targets_all = np.concatenate(oof_targets) #test_preds_all = np.stack(test_cv_preds) return np.mean(scores)
clf.fit( X_train=X_train_1, y_train=Y_train_1, eval_set=[(X_train_1, Y_train_1), (X_valid_1, Y_valid_1)], eval_name=['train', 'valid'], eval_metric=['mae', 'mse'], max_epochs=max_epochs, patience=50, # batch_size=1024, # virtual_batch_size=128, num_workers=0, drop_last=False) X_test = X_test.values preds = clf.predict(X_test) preds[:48] sub = pd.DataFrame(preds) sub['1'] = sub[0] * 0.6 sub['2'] = sub[0] * 0.7 sub['3'] = sub[0] * 0.8 sub['4'] = sub[0] * 0.9 sub['5'] = sub[0] * 1 sub['6'] = sub[0] * 1.1 sub['7'] = sub[0] * 1.2 sub['8'] = sub[0] * 1.3 sub['9'] = sub[0] * 1.4 sub[:48]
def run_model(self, train_df, targets, X_test): """ Run model. Args: train_df (dataframe): training inputs with dimensions [n_observations,n_features] targets (dataframe): updated input data of known responses (binary) from MoA targets for train data X_test (arr): test inputs with dimensions [n_observations,n_features] Returns: arr: predicted outputs with dimensions [n_splits_kfold,n_observations,n_moa_targets] """ test_cv_preds = [] oof_preds = [] oof_targets = [] scores = [] mskf = MultilabelStratifiedKFold(n_splits=self.config.NB_SPLITS, random_state=0, shuffle=True) for fold_nb, (train_idx, val_idx) in enumerate(mskf.split(train_df, targets)): print("FOLDS: ", fold_nb + 1) print('*' * 60) X_train, y_train = train_df.values[train_idx, :], targets.values[ train_idx, :] X_val, y_val = train_df.values[val_idx, :], targets.values[ val_idx, :] model = TabNetRegressor(**self.tabnet_params) model.fit(X_train=X_train, y_train=y_train, eval_set=[(X_val, y_val)], eval_name=["val"], eval_metric=["logits_ll"], max_epochs=self.config.MAX_EPOCH, patience=20, batch_size=1024, virtual_batch_size=32, num_workers=1, drop_last=False, loss_fn=F.binary_cross_entropy_with_logits) print('-' * 60) preds_val = model.predict(X_val) preds = 1 / (1 + np.exp(-preds_val)) score = np.min(model.history["val_logits_ll"]) oof_preds.append(preds_val) oof_targets.append(y_val) scores.append(score) preds_test = model.predict(X_test) test_cv_preds.append(1 / (1 + np.exp(-preds_test))) oof_preds_all = np.concatenate(oof_preds) oof_targets_all = np.concatenate(oof_targets) test_preds_all = np.stack(test_cv_preds) aucs = [] for task_id in range(oof_preds_all.shape[1]): aucs.append( roc_auc_score(y_true=oof_targets_all[:, task_id], y_score=oof_preds_all[:, task_id])) print(f"Overall AUC: {np.mean(aucs)}") print(f"Average CV: {np.mean(scores)}") return test_preds_all