def model_train_pred(fold): model_path = os.path.join(model_dir, model_file_name + f"_FOLD{fold}.pth") tabnet_params = dict(n_d = 64, n_a = 128, n_steps = 1, gamma = 1.3,lambda_sparse = 0, n_independent = 2,n_shared = 1,optimizer_fn = optim.Adam, optimizer_params = dict(lr = self.LEARNING_RATE, weight_decay = 1e-5), mask_type = "entmax", scheduler_params = dict(mode = "min", patience = 10, min_lr = 1e-5, factor = 0.9), scheduler_fn = ReduceLROnPlateau,verbose = 10) x_fold_train, y_fold_train, x_fold_val, y_fold_val, df_test_x_copy, val_idx = \ preprocess(fold, df_train, df_train_x, df_train_y, df_test_x, no_of_components) x_fold_train, x_fold_val, df_test_x_copy = variance_threshold(x_fold_train, x_fold_val, df_test_x_copy) ### Fit ### model = TabNetRegressor(**tabnet_params) model.fit(X_train = x_fold_train.values, y_train = y_fold_train.values, eval_set = [(x_fold_val.values, y_fold_val.values)], eval_name = ["val"], eval_metric = ["logits_ll"],max_epochs = self.EPOCHS, patience = 40,batch_size = self.BATCH_SIZE, virtual_batch_size = 32,num_workers = 1,drop_last = False, loss_fn = SmoothBCEwLogits(smoothing = 1e-4, pos_weight=pos_weight)) ###---- Prediction --- oof = np.zeros(df_train_y.shape) valid_preds = 1 / (1 + np.exp(-model.predict(x_fold_val.values))) oof[val_idx] = valid_preds predictions = 1 / (1 + np.exp(-model.predict(df_test_x_copy.values))) model_path = model.save_model(model_path) return oof, predictions
class TabNetBase(AI_Base): # file_path = os.getcwd() + "\\src\\AIs\\models\\TabNetv1\\" file_path = os.getcwd() + "\\" save_name = file_path + "test_model" def __init__(self, *args, **kwargs): _TPI(self, locals()) super(TabNetBase, self).__init__(*args, **kwargs) ACT = self.env.act MATCH = self.env.match_loader self.X_train, self.X_valid, self.X_test = None, None, None self.y_train, self.y_valid, self.y_test = None, None, None self.cat_idxs, self.cat_dims, self.cat_emb_dim = MATCH.get_categorical( ) self.ai = None self._scenario_tactics = None self._scenario_matches = None self._scenario_learn_from_file = list([[ 1, # [self.epochs, [ 1, # [len(MATCH), [ 1, (self.act_register_data, dict(data=MATCH.act_get(is_flat=True))), self.act_modify_data, self.act_init_ai, # self.act_load_game, self.act_run_ai_with_learn, # self.act_test ] ], ]]) self.set_mode(self.mode) def act_register_data(self, data, is_test=False): if is_test is True: _TPI(self, locals()) else: self.X_train = np.array(self.env.match_loader.train_players) self.y_train = np.array(self.env.match_loader.train_plus) self.X_valid = np.array(self.env.match_loader.valid_players) self.y_valid = np.array(self.env.match_loader.valid_plus) self.X_test = np.array(self.env.match_loader.test_players) self.y_test = np.array(self.env.match_loader.test_plus) def act_init_ai(self, is_test=False): if is_test is True: _TPI(self, locals()) else: MATCH = self.env.match_loader self.ai = TabNetRegressor(n_steps=10, input_dim=MATCH.count_cols * MATCH.count_players, cat_dims=self.cat_dims, cat_emb_dim=self.cat_emb_dim, cat_idxs=self.cat_idxs) def act_modify_data(self, is_test=False): if is_test is True: _TPI(self, locals()) else: pass def act_load_game(self, is_test=False): if is_test is True: _TPI(self, locals()) else: save = self.save_name + ".zip" if os.path.isfile(save): print("Load Network") self.ai.load_model(save) def act_test(self, is_test=False): if is_test is True: _TPI(self, locals()) else: predictions = self.ai.predict(self.X_test) y_true = self.y_test test_score = mean_squared_error(y_pred=predictions, y_true=y_true) #np.savetxt("predict.txt", predictions, delimiter=',', fmt='%d') #np.savetxt("true.txt", y_true, delimiter=',', fmt='%d') print(test_score) def act_run_ai_with_learn(self, is_test=False): if is_test is True: _TPI(self, locals()) else: self.ai.fit(X_train=self.X_train, y_train=self.y_train, X_valid=self.X_valid, y_valid=self.y_valid, max_epochs=self.epochs, patience=500, batch_size=512, drop_last=False) # self.ai.save_model(self.save_name) def act_save_model(self, is_test=False): if is_test is True: _TPI(self, locals()) else: print(self.save_name) self.ai.save_model(self.save_name)
def run(try_num, config): args = get_args() print('config:', config.to_dict(), flush=True) print('args:', args, flush=True) os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' model_dir = f'blending-02-tabnet-{try_num}' if not os.path.exists(model_dir): os.mkdir(model_dir) train_features = pd.read_csv('../input/lish-moa/train_features.csv') train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv') dae_features = pd.read_csv(config.dae_path) test_features = pd.read_csv('../input/lish-moa/test_features.csv') if args.debug: train_features = train_features[:500] train_targets = train_targets[:500] dae_features = pd.concat([dae_features.iloc[:500], dae_features.iloc[-3982:]]).reset_index(drop=True) config.update(dict( n_folds=3, seeds=[222], n_epochs=3, batch_size=128, )) target_columns = [col for col in train_targets.columns if col != 'sig_id'] n_targets = len(target_columns) train_features, train_targets, test_features = preprocess(config, model_dir, train_features, train_targets, test_features, dae_features) features_columns = [col for col in train_features.columns if col not in ['sig_id', 'cp_type', 'cp_time', 'cp_dose', 'cp_type_ctl_vehicle', 'cp_type_trt_cp']] train_features = train_features[features_columns] test_features = test_features[features_columns] smooth_loss_function = SmoothBCEwLogits(smoothing=config.smoothing) kfold = MultilabelStratifiedKFold(n_splits=config.n_folds, random_state=42, shuffle=True) oof_preds = np.zeros((len(train_features), len(config.seeds), n_targets)) test_preds = [] for seed_index, seed in enumerate(config.seeds): print(f'Train seed {seed}', flush=True) set_seed(seed) for fold_index, (train_indices, val_indices) in enumerate(kfold.split( train_targets[target_columns].values, train_targets[target_columns].values )): print(f'Train fold {fold_index + 1}', flush=True) x_train = train_features.loc[train_indices, features_columns].values y_train = train_targets.loc[train_indices, target_columns].values x_val = train_features.loc[val_indices, features_columns].values y_val = train_targets.loc[val_indices, target_columns].values weights_path = f'{model_dir}/weights-{seed}-{fold_index}.pt' tabnet_conf = dict( seed=seed, optimizer_fn=optim.Adam, scheduler_fn=optim.lr_scheduler.ReduceLROnPlateau, n_d=32, n_a=32, n_steps=1, gamma=1.3, lambda_sparse=0, momentum=0.02, optimizer_params=dict(lr=2e-2, weight_decay=1e-5), scheduler_params=dict(mode="min", patience=5, min_lr=1e-5, factor=0.9), mask_type="entmax", verbose=10, n_independent=1, n_shared=1, ) if args.only_pred: print('Skip training', flush=True) else: model = TabNetRegressor(**tabnet_conf) model.fit( X_train=x_train, y_train=y_train, eval_set=[(x_val, y_val)], eval_name=['val'], eval_metric=['logits_ll'], max_epochs=config.n_epochs, patience=20, batch_size=1024, virtual_batch_size=32, num_workers=1, drop_last=True, loss_fn=smooth_loss_function ) model.save_model(weights_path) print('Save weights to: ', weights_path, flush=True) model = TabNetRegressor(**tabnet_conf) model.load_model(f'{weights_path}.zip') val_preds = sigmoid(model.predict(x_val)) score = mean_log_loss(y_val, val_preds, n_targets) print(f'fold_index {fold_index} - val_loss: {score:5.5f}', flush=True) oof_preds[val_indices, seed_index, :] = val_preds preds = sigmoid(model.predict(test_features.values)) test_preds.append(preds) score = mean_log_loss(train_targets[target_columns].values, oof_preds[:, seed_index, :], n_targets) print(f'Seed {seed} - val_loss: {score:5.5f}', flush=True) oof_preds = np.mean(oof_preds, axis=1) score = mean_log_loss(train_targets[target_columns].values, oof_preds, n_targets) print(f'Overall score is {score:5.5f}', flush=True) oof_pred_df = train_targets.copy() oof_pred_df.loc[:, target_columns] = oof_preds oof_pred_df.to_csv(f'{model_dir}/oof_pred.csv', index=False) test_features = pd.read_csv('../input/lish-moa/test_features.csv') submission = create_submission(test_features, ['sig_id'] + target_columns) submission[target_columns] = np.mean(test_preds, axis=0) submission.loc[test_features['cp_type'] == 'ctl_vehicle', target_columns] = 0 submission.to_csv(f'{model_dir}/submission.csv', index=False)
def train_tabnet(x_train, y_train, x_test, submission, feature_cols, target_cols, seeds, nfolds, save_path): cfg_fe = Config_FeatureEngineer() seed_everything(seed_value=cfg_fe.seed) cfg_tabnet = Config_TabNet() test_cv_preds = [] oof_preds = [] scores = [] for seed in seeds: kfold_col = f'kfold_{seed}' print("seed: {}".format(seed)) print('*' * 60) for fold in range(nfolds): oof_preds_fold = y_train.copy() oof_preds_fold.iloc[:, :] = 0 print('*' * 60) print("FOLD: {}".format(fold + 1)) print('*' * 60) trn_idx = x_train[x_train[kfold_col] != fold].index val_idx = x_train[x_train[kfold_col] == fold].index train_df = x_train[x_train[kfold_col] != fold].reset_index( drop=True) valid_df = x_train[x_train[kfold_col] == fold].reset_index( drop=True) x_tr, y_tr = train_df[feature_cols].values, train_df[ target_cols].values x_val, y_val = valid_df[feature_cols].values, valid_df[ target_cols].values # tabnet model model = TabNetRegressor( n_d=cfg_tabnet.n_d, n_a=cfg_tabnet.n_a, n_steps=cfg_tabnet.n_steps, n_independent=cfg_tabnet.n_independent, n_shared=cfg_tabnet.n_shared, gamma=cfg_tabnet.gamma, lambda_sparse=cfg_tabnet.lambda_sparse, optimizer_fn=cfg_tabnet.optimizer_fn, optimizer_params=cfg_tabnet.optimizer_params, mask_type=cfg_tabnet.mask_type, scheduler_params=cfg_tabnet.scheduler_params, scheduler_fn=cfg_tabnet.scheduler_fn, seed=seed, verbose=cfg_tabnet.verbose) # fit model model.fit( X_train=x_tr, y_train=y_tr, eval_set=[(x_val, y_val)], eval_name=["val"], eval_metric=["logits_ll"], max_epochs=cfg_tabnet.max_epochs, patience=cfg_tabnet.fit_patience, batch_size=cfg_tabnet.batch_size, virtual_batch_size=cfg_tabnet.virtual_batch_size, num_workers=1, drop_last=False, # To use binary cross entropy because this is not a regression problem loss_fn=BCEwLogitsSmooth(smooth=cfg_tabnet.labelsmooth_rate)) print('-' * 60) # save model model.save_model( os.path.join(save_path, f"TabNet_seed{seed}_FOLD{fold}")) print('*' * 60) # Predict on validation preds_val = model.predict(x_val) # Apply sigmoid to the predictions preds = 1 / (1 + np.exp(-preds_val)) score = np.min(model.history["val_logits_ll"]) oof_preds.append(preds) scores.append(score) # Save OOF for CV preds_tr = model.predict(x_train[feature_cols].values) preds = 1 / (1 + np.exp(-preds_tr)) oof_preds_fold.loc[:, target_cols] = preds oof_preds_fold.to_csv( path_or_buf=f"./TabNet_oof_preds_seed{seed}_FOLD{fold}.csv", sep=',', index=False) # Predict on test preds_test = model.predict(x_test[feature_cols].values) preds_test = 1 / (1 + np.exp(-preds_test)) test_cv_preds.append(preds_test) test_cv_preds_fold = pd.DataFrame(preds_test, columns=target_cols) test_cv_preds_fold["sig_id"] = x_test["sig_id"] test_cv_preds_fold.to_csv( path_or_buf=f"./TabNet_test_preds_seed{seed}_FOLD{fold}.csv", sep=',', index=False) oof_preds_all = np.concatenate(oof_preds) test_preds_all = np.stack(test_cv_preds) print("Averaged Best Score for CVs is: {}".format(np.mean(scores))) return test_preds_all
def run_training_tabnet(train, test, trn_idx, val_idx, feature_cols, target_cols, fold, seed, filename="tabnet"): seed_everything(seed) train_ = process_data(train) test_ = process_data(test) train_df = train_.loc[trn_idx, :].reset_index(drop=True) valid_df = train_.loc[val_idx, :].reset_index(drop=True) x_train, y_train = train_df[feature_cols].values, train_df[ target_cols].values x_valid, y_valid = valid_df[feature_cols].values, valid_df[ target_cols].values model = TabNetRegressor( n_d=32, n_a=32, n_steps=1, lambda_sparse=0, cat_dims=[3, 2], cat_emb_dim=[1, 1], cat_idxs=[0, 1], optimizer_fn=torch.optim.Adam, optimizer_params=dict(lr=2e-2, weight_decay=1e-5), mask_type='entmax', # device_name=DEVICE, scheduler_params=dict(milestones=[100, 150], gamma=0.9), #) scheduler_fn=torch.optim.lr_scheduler.MultiStepLR, verbose=10, seed=seed) loss_fn = LabelSmoothing(0.001) # eval_metric = SmoothedLogLossMetric(0.001) # eval_metric_nosmoothing = SmoothedLogLossMetric(0.) oof = np.zeros((len(train), target.iloc[:, 1:].shape[1])) if IS_TRAIN: # print("isnan", np.any(np.isnan(x_train))) model.fit(X_train=x_train, y_train=y_train, eval_set=[(x_valid, y_valid)], eval_metric=[LogLossMetric, SmoothedLogLossMetric], max_epochs=200, patience=50, batch_size=1024, virtual_batch_size=128, num_workers=0, drop_last=False, loss_fn=loss_fn) model.save_model(f"{MODEL_DIR}/{NB}_{filename}_SEED{seed}_FOLD{fold}") #--------------------- PREDICTION--------------------- x_test = test_[feature_cols].values model = TabNetRegressor( n_d=32, n_a=32, n_steps=1, lambda_sparse=0, cat_dims=[3, 2], cat_emb_dim=[1, 1], cat_idxs=[0, 1], optimizer_fn=torch.optim.Adam, optimizer_params=dict(lr=2e-2, weight_decay=1e-5), mask_type='entmax', # device_name=DEVICE, scheduler_params=dict(milestones=[100, 150], gamma=0.9), #) scheduler_fn=torch.optim.lr_scheduler.MultiStepLR, verbose=10, seed=seed) model.load_model( f"{MODEL_DIR}/{NB}_{filename}_SEED{seed}_FOLD{fold}.model") valid_preds = model.predict(x_valid) valid_preds = torch.sigmoid( torch.as_tensor(valid_preds)).detach().cpu().numpy() oof[val_idx] = valid_preds predictions = model.predict(x_test) predictions = torch.sigmoid( torch.as_tensor(predictions)).detach().cpu().numpy() return oof, predictions
patience=20, batch_size=1024, virtual_batch_size=32, num_workers=1, drop_last=False, loss_fn=SmoothBCEwLogits(smoothing=5e-5)) print('-' * 60) ### Predict on validation ### preds_val = model.predict(X_val) # Apply sigmoid to the predictions preds = 1 / (1 + np.exp(-preds_val)) score = np.min(model.history["val_logits_ll"]) saving_path_name = 'TabNet_seed_' + str( tabnet_params['seed']) + '_fold_' + str(fold_nb + 1) saved_filepath = model.save_model(saving_path_name) loaded_model = TabNetRegressor() loaded_model.load_model(saved_filepath) ### Save OOF for CV ### oof_preds[val_idx] += preds_val / len(SEED) scores.append(score) ### Predict on test ### model.load_model(saved_filepath) preds_test = model.predict(X_test) test_cv_preds.append(1 / (1 + np.exp(-preds_test))) test_preds_all = np.stack(test_cv_preds)