def main(): api_token = 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiYWQxMjg3OGEtMGI1NC00NzFmLTg0YmMtZmIxZjcxZDM2NTAxIn0=' neptune.init(api_token=api_token, project_qualified_name='jamesmccarthy65/Numerai') data = utils.load_data('data/', mode='train') data, target, features, era = utils.preprocess_data(data, nn=True) datasets = { 'data': data, 'target': target, 'features': features, 'era': era } print('creating XGBoost Trials') xgb_exp = neptune.create_experiment('XGBoost_HPO') xgb_neptune_callback = opt_utils.NeptuneCallback(experiment=xgb_exp) study = optuna.create_study(direction='minimize') study.optimize(lambda x: optimize(x, datasets), n_trials=10, callbacks=[xgb_neptune_callback]) joblib.dump(study, f'HPO/xgb_hpo_{str(datetime.datetime.now().date())}.pkl') print('Creating LightGBM Trials') lgb_exp = neptune.create_experiment('LGBM_HPO') lgbm_neptune_callback = opt_utils.NeptuneCallback(experiment=lgb_exp) study = optuna.create_study(direction='minimize') study.optimize(loptimize, n_trials=10, callbacks=[lgbm_neptune_callback]) joblib.dump(study, f'HPO/lgb_hpo_{str(datetime.datetime.now().date())}.pkl')
def main(): api_token = read_api_token() neptune.init(api_token=api_token, project_qualified_name='jamesmccarthy65/JSMP') data = load_data('data/', mode='train', overide='filtered_train.csv') data, target, features, date = preprocess_data(data) data_dict = { 'data': data, 'target': target, 'features': features, 'date': date } print('creating XGBoost Trials') xgb_exp = neptune.create_experiment('XGBoost_HPO') xgb_neptune_callback = opt_utils.NeptuneCallback(experiment=xgb_exp) study = optuna.create_study(direction='maximize') study.optimize(lambda trial: optimize(trial, data_dict), n_trials=100, callbacks=[xgb_neptune_callback]) joblib.dump(study, f'HPO/xgb_hpo_{str(datetime.datetime.now().date())}.pkl') print('Creating LightGBM Trials') lgb_exp = neptune.create_experiment('LGBM_HPO') lgbm_neptune_callback = opt_utils.NeptuneCallback(experiment=lgb_exp) study = optuna.create_study(direction='maximize') study.optimize(lambda trial: loptimize(trial, data_dict), n_trials=100, callbacks=[lgbm_neptune_callback]) joblib.dump(study, f'HPO/lgb_hpo_{str(datetime.datetime.now().date())}.pkl')
def optimize(self, optimizer: TModelOptimizer): optuna.logging.enable_default_handler() self.optuna_study.optimize(optimizer.evaluate_trial, n_trials=self.train_params['trials'], callbacks=[opt_utils.NeptuneCallback(log_study=True, log_charts=True)]) opt_utils.log_study_info(self.optuna_study) study_importance = optuna.importance.get_param_importances(self.optuna_study) study_importance = dt.Frame(variable=list(study_importance.keys()), valor=list(study_importance.values())) return study_importance
def find_minimizing_params(function, arguments, first_step_args=()): def objective_function(trial): mapped_arguments = [ trial.suggest_int(argument.name, argument.min, argument.max) if argument.is_int else trial.suggest_uniform( argument.name, argument.min, argument.max) for argument in arguments ] sum = 0 for data in training_data: emg_single_data = data[0] try: result = data[1] if function == onset_sign_changes or function == "onset_two_step_first_step": value, right_side = onset_sign_changes( emg_single_data, *mapped_arguments) elif function == onset_two_step_alg: value = function(emg_single_data, *first_step_args, *mapped_arguments) else: value = function(emg_single_data, *mapped_arguments) sum += abs(value - result) if function == "onset_two_step_first_step" and ( value is None or value > result or right_side < result): sum += 5000 if value == -1: sum += 5000 except: sum += 5000 cost = sum return cost if function == "onset_two_step_second_step": function = onset_two_step_alg arguments = arguments neptune.init(project_qualified_name=project_name, api_token=personal_token) neptune.create_experiment( name=function if isinstance(function, str) else function.__name__) neptune_callback = opt_utils.NeptuneCallback() study = optuna.create_study(direction='minimize') study.optimize(objective_function, n_trials=OPTIMIZATION_TRIALS, callbacks=[neptune_callback], n_jobs=OPTIMIZATION_CONCURRENT_JOBS) print(study.best_params) print(study.best_value) print(study.best_trial) return study.best_params
def tune(classifer, params: dict, alg: str, tags=None, preprocessors=None, test_size=0.2, random_state=42): """ :param classifer: sklearn regressor :param params: dict params for regressor for tuning :param tags: optional tags for neptune exps, by default module name :param preprocessors: optional preprocessors :param test_size: size for test datamodules :param random_state: random seed for split """ model_name = classifer.__name__ tags = tags if tags is not None else [] tags.append(model_name) neptune.init(project_qualified_name='jiashuxu/folklore', api_token=NEPTUNE_API) neptune.create_experiment(name=model_name, tags=tags) neptune_callback = opt_utils.NeptuneCallback(log_study=True, log_charts=True) study = optuna.create_study(direction="minimize") objective = Objective( classifer, params, alg, *get_data(filter_no=10, preprocess=["standard_scaler", "pca"])) study.optimize(objective, n_trials=50, callbacks=[neptune_callback]) opt_utils.log_study_info(study) print(f"best merror score: {study.best_value} with {study.best_params}") neptune.stop()
def main(): seed_everything(0) data = load_data(root_dir='./data/', mode='train') data, target, features, era = preprocess_data(data, ordinal=True) api_token = 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiYWQxMjg3OGEtMGI1NC00NzFmLTg0YmMtZmIxZjcxZDM2NTAxIn0=' neptune.init(api_token=api_token, project_qualified_name='jamesmccarthy65/Numerai') nn_exp = neptune.create_experiment('SupAE_HPO') nn_neptune_callback = opt_utils.NeptuneCallback(experiment=nn_exp) study = optuna.create_study(direction='minimize') data_dict = { 'data': data, 'target': target, 'features': features, 'era': era } study.optimize(lambda trial: optimize(trial, data_dict=data_dict), n_trials=100, callbacks=[nn_neptune_callback]) joblib.dump( study, f'hpo/params/SupAEnn_hpo_{str(datetime.datetime.now().date())}.pkl')
def main(): seed_everything(0) data = load_data(root_dir='./data/', mode='train') data, target, features, date = preprocess_data(data, nn=True, action='multi') api_token = read_api_token() neptune.init(api_token=api_token, project_qualified_name='jamesmccarthy65/JSMP') nn_exp = neptune.create_experiment('Resnet_HPO_Multiclass') nn_neptune_callback = opt_utils.NeptuneCallback(experiment=nn_exp) study = optuna.create_study(direction='minimize') data_dict = { 'data': data, 'target': target, 'features': features, 'date': date } study.optimize(lambda trial: optimize(trial, data_dict=data_dict), n_trials=100, callbacks=[nn_neptune_callback]) joblib.dump(study, f'HPO/nn_hpo_{str(datetime.datetime.now().date())}.pkl')
def optuna_callback(self): if self.log and self._optuna_callback is None: self._optuna_callback = nmo.NeptuneCallback( experiment=self.experiment) return self._optuna_callback
def find_best_params(self): def objective(trial, params): # Suggest values of the hyperparameters using a trial object. n_bot_layers = trial.suggest_int('n_bot_layers', 2, 5) n_top_layers = trial.suggest_int('n_top_layers', 2, 4) bot_layers = [] top_layers = [] arch_sparse_feature_size = trial.suggest_int( 'arch_sparse_feature_size', 16, 32) for i in range(n_bot_layers): if i == 0: bot_layers.append( params["den_fea"] ) # This value is related to the number of numerical columns (fixed by input data) elif i == (n_bot_layers - 1): bot_layers.append( arch_sparse_feature_size ) # This value is related to the arch_sparse_feature_size else: bot_features = trial.suggest_int( 'n_bot_units_l{}'.format(i), 32, 512) bot_layers.append(bot_features) for i in range(n_top_layers): if i == (n_top_layers - 1): top_layers.append( 1 ) # This value should always be 1, as it is a binary classification else: top_features = trial.suggest_int( 'n_top_units_l{}'.format(i), 32, 512) top_layers.append(top_features) arch_mlp_bot = '-'.join(str(x) for x in bot_layers) arch_mlp_top = '-'.join(str(x) for x in top_layers) learning_rate = trial.suggest_float('learning_rate', 0.001, 0.1) #loss_function = trial.suggest_categorical('loss_function', ['mse', 'bce']) # Assigning trial hyper-parameters to params params["arch_sparse_feature_size"] = arch_sparse_feature_size params["arch_mlp_bot"] = arch_mlp_bot params["arch_mlp_top"] = arch_mlp_top params["learning_rate"] = learning_rate # Run DLRM and get results dlrm_model = DLRM_Model(**params) validation_results = dlrm_model.run() for key in validation_results: if key not in ['classification_report', 'confusion_matrix']: neptune.log_metric(key, validation_results[key]) # Print trial (if verbose) if self.verbose: print('Parameters: ', params, '/n Results: ', validation_results) return validation_results[ 'best_pre_auc_test'] # ['best_auc_test'] Need to decide which metric is best # Assigning fixed parameters to params params = { "data_generation": 'dataset', "data_set": 'normal', #'kaggle', "raw_data_file": './input/trainday0day0day0day0.txt', # "processed_data_file": './input/kaggleAdDisplayChallenge_processed.npz', "loss_function": 'bce', # loss_function, #"round_targets": True, We want to have a ranked list instead of yes/no "mini_batch_size": 32, # 128, "print_freq": 32, # 256, "test_freq": 32, # 128, "mlperf_logging": True, "print_time": True, "test_mini_batch_size": 32, # 256, "den_fea": 13, "spa_fea": 26 # "test_num_workers": 16 # "save_model ": 'dlrm_criteo_kaggle_.pytorch' # "use_gpu": True # "enable_profiling": True, # "plot_compute_graph": True, } # params = { # "data_generation": 'dataset', # "data_set": 'normal', # "raw_data_file": './input/recsys_users.txt', # # "processed_data_file": './input/kaggleAdDisplayChallenge_processed.npz', # "loss_function": 'wbce', # loss_function, # #"round_targets": True, We want to have a ranked list instead of yes/no # "mini_batch_size": 128, # "print_freq": 256, # "test_freq": 128, # "mlperf_logging": True, # "print_time": True, # "test_mini_batch_size": 256, # "loss_weights": '0.0348-0.9652', # "den_fea": 240, # 90 # 13 dense features (numerical) # PBV Main change between datasets # "spa_fea": 35 # 51 # 26 sparse features (categorical) # PBV Main change between datasets # # "test_num_workers": 16 # # "save_model ": 'dlrm_criteo_kaggle_.pytorch' # # "use_gpu": True # # "enable_profiling": True, # # "plot_compute_graph": True, # } neptune.init('pedrobaiz/dlrm', api_token=self.API_KEY) neptune.create_experiment('recsys-' + self.model_name, tags=[str(self.neptune_tags)]) neptune_callback = optuna_utils.NeptuneCallback() study = optuna.create_study(direction='maximize') study.optimize(lambda trial: objective(trial, params), n_trials=self.max_evals, callbacks=[neptune_callback]) optuna_utils.log_study(study) best_params = self.rebuild_mlps(params, study.best_params) print('finished find_best_params... ', best_params) return best_params
import neptune neptune.init(api_token='ANONYMOUS', project_qualified_name='shared/optuna-integration') # Quickstart ## Step 1: Create an Experiment neptune.create_experiment('optuna-sweep') ## Step 2: Create the Neptune Callback import neptunecontrib.monitoring.optuna as opt_utils neptune_callback = opt_utils.NeptuneCallback() ## Step 3: Run Optuna with the Neptune Callback study = optuna.create_study(direction='maximize') study.optimize(objective, n_trials=100, callbacks=[neptune_callback]) ## Step 4: Stop logging # tests exp = neptune.get_experiment() neptune.stop() # tests all_logs = exp.get_logs()
'boosting_type': 'gbdt', 'verbose': 1, 'metric': 'auc'} x_tr, x_val = data[tr_idx], data[val_idx] y_tr, y_val = target[tr_idx], target[val_idx] train = lgb.Dataset(x_tr, label=y_tr) val = lgb.Dataset(x_val, label=y_val) clf = lgb.LGBMClassifier(n_estimators=1000, verbose_eval=True, **p) clf.fit(x_tr, y_tr, early_stopping_rounds=50) preds = clf.predict(x_val) score = roc_auc_score(y_val, preds) print(f'Fold {i} ROC AUC:\t', score) """ api_token = read_api_token() neptune.init(api_token=api_token, project_qualified_name='jamesmccarthy65/JSMP') data = load_data('data/', mode='train', overide='filtered_train.csv') data, target, features, date = preprocess_data(data) print('creating XGBoost Trials') xgb_exp = neptune.create_experiment('XGBoost_HPO') xgb_neptune_callback = opt_utils.NeptuneCallback(experiment=xgb_exp) study = optuna.create_study(direction='maximize') study.optimize(optimize, n_trials=500, callbacks=[xgb_neptune_callback]) joblib.dump(study, f'HPO/xgb_hpo_{str(datetime.datetime.now().date())}.pkl') print('Creating LightGBM Trials') lgb_exp = neptune.create_experiment('LGBM_HPO') lgbm_neptune_callback = opt_utils.NeptuneCallback(experiment=lgb_exp) study = optuna.create_study(direction='maximize') study.optimize(loptimize, n_trials=500, callbacks=[lgbm_neptune_callback]) joblib.dump(study, f'HPO/lgb_hpo_{str(datetime.datetime.now().date())}.pkl')