def test_cls(): save_dir = './data/eval_exps/soln-ml' if not os.path.exists(save_dir): os.makedirs(save_dir) time_limit = 60 print('==> Start to evaluate with Budget %d' % time_limit) ensemble_method = 'bagging' eval_type = 'holdout' iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1, stratify=y) dm = DataManager(X_train, y_train) train_data = dm.get_data_node(X_train, y_train) test_data = dm.get_data_node(X_test, y_test) clf = Classifier(time_limit=time_limit, output_dir=save_dir, enable_meta_algorithm_selection=False, ensemble_method=ensemble_method, ensemble_size=10, evaluation=eval_type, metric='acc') clf.fit(train_data) print(clf.summary()) pred = clf.predict(test_data) print(accuracy_score(test_data.data[1], pred)) shutil.rmtree(save_dir)
def main(): time_limit = 60 print('==> Start to evaluate with Budget %d' % time_limit) iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1, stratify=y) dm = DataManager(X_train, y_train) train_data = dm.get_data_node(X_train, y_train) test_data = dm.get_data_node(X_test, y_test) save_dir = './data/eval_exps/soln-ml' if not os.path.exists(save_dir): os.makedirs(save_dir) add_classifier(UserDefinedDecisionTree) clf = Classifier(time_limit=time_limit, output_dir=save_dir, enable_meta_algorithm_selection=False, include_algorithms=['UserDefinedDecisionTree'], ensemble_method=None, metric='acc') _start_time = time.time() clf.fit(train_data) print(clf.summary()) pred = clf.predict(test_data) print(accuracy_score(test_data.data[1], pred)) shutil.rmtree(save_dir)
def evaluate(): iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) try: dm = DataManager(X_train, y_train) train_data = dm.get_data_node(X_train, y_train) test_data = dm.get_data_node(X_test, y_test) clf = Classifier(dataset_name='iris', time_limit=150, output_dir=save_dir, ensemble_method=ensemble_method, evaluation=eval_type, metric='acc') clf.fit(train_data) clf.refit() pred = clf.predict(test_data) print('final score', clf.score(test_data)) except Exception as e: return False return True
def evaluate_2rd_hmab(run_id, mth, dataset, algo, eval_type='holdout', time_limit=1200, seed=1): task_type = MULTICLASS_CLS train_data, test_data = load_train_test_data(dataset, task_type=task_type) from solnml.estimators import Classifier clf = Classifier(time_limit=time_limit, per_run_time_limit=300, output_dir=save_folder, ensemble_method=None, evaluation=eval_type, enable_meta_algorithm_selection=False, metric='bal_acc', include_algorithms=[algo], n_jobs=1) clf.fit(train_data, opt_strategy=mth) pred = clf.predict(test_data) test_score = balanced_accuracy_score(test_data.data[1], pred) timestamps, perfs = clf.get_val_stats() validation_score = np.max(perfs) print('Evaluation Num : %d' % len(perfs)) print('Run ID : %d' % run_id) print('Dataset : %s' % dataset) print('Val/Test score : %f - %f' % (validation_score, test_score)) save_path = save_folder + '%s_%s_%d_%d_%s.pkl' % (mth, dataset, time_limit, run_id, algo) with open(save_path, 'wb') as f: pickle.dump([dataset, validation_score, test_score], f)
def evaluate_hmab(algorithms, run_id, time_limit=600, dataset='credit', eval_type='holdout', enable_ens=True, seed=1): task_id = '[hmab][%s-%d-%d]' % (dataset, len(algorithms), time_limit) _start_time = time.time() train_data, test_data = load_train_test_data(dataset, task_type=MULTICLASS_CLS) if enable_ens is True: ensemble_method = 'ensemble_selection' else: ensemble_method = None clf = Classifier(time_limit=time_limit, amount_of_resource=None, output_dir=save_dir, ensemble_method=ensemble_method, evaluation=eval_type, metric='bal_acc', n_jobs=1) clf.fit(train_data) clf.refit() pred = clf.predict(test_data) test_score = balanced_accuracy_score(test_data.data[1], pred) timestamps, perfs = clf.get_val_stats() validation_score = np.max(perfs) print('Dataset : %s' % dataset) print('Validation/Test score : %f - %f' % (validation_score, test_score)) save_path = save_dir + '%s-%d.pkl' % (task_id, run_id) with open(save_path, 'wb') as f: stats = [timestamps, perfs] pickle.dump([validation_score, test_score, stats], f)
def evaluate_sys(run_id, task_type, mth, dataset, ens_method, enable_meta, eval_type='holdout', time_limit=1200, seed=1): _task_type = MULTICLASS_CLS if task_type == 'cls' else REGRESSION train_data, test_data = load_train_test_data(dataset, task_type=_task_type) _enable_meta = True if enable_meta == 'true' else False if task_type == 'cls': from solnml.estimators import Classifier estimator = Classifier(time_limit=time_limit, per_run_time_limit=300, output_dir=save_folder, ensemble_method=ens_method, enable_meta_algorithm_selection=_enable_meta, evaluation=eval_type, metric='bal_acc', include_algorithms=['random_forest'], include_preprocessors=['extra_trees_based_selector', 'generic_univariate_selector', 'liblinear_based_selector', 'percentile_selector'], n_jobs=1) else: from solnml.estimators import Regressor estimator = Regressor(time_limit=time_limit, per_run_time_limit=300, output_dir=save_folder, ensemble_method=ens_method, enable_meta_algorithm_selection=_enable_meta, evaluation=eval_type, metric='mse', include_algorithms=['random_forest'], include_preprocessors=['extra_trees_based_selector_regression', 'generic_univariate_selector', 'liblinear_based_selector', 'percentile_selector_regression'], n_jobs=1) start_time = time.time() estimator.fit(train_data, opt_strategy=mth, dataset_id=dataset) pred = estimator.predict(test_data) if task_type == 'cls': test_score = balanced_accuracy_score(test_data.data[1], pred) else: test_score = mean_squared_error(test_data.data[1], pred) validation_score = estimator._ml_engine.solver.incumbent_perf eval_dict = estimator._ml_engine.solver.get_eval_dict() print('Run ID : %d' % run_id) print('Dataset : %s' % dataset) print('Val/Test score : %f - %f' % (validation_score, test_score)) save_path = save_folder + 'extremely_small_%s_%s_%s_%s_%d_%d_%d.pkl' % ( task_type, mth, dataset, enable_meta, time_limit, (ens_method is None), run_id) with open(save_path, 'wb') as f: pickle.dump([dataset, validation_score, test_score, start_time, eval_dict], f) # Delete output dir shutil.rmtree(os.path.join(estimator.get_output_dir()))
def model_fit(_id,obj,paramsj,X_trainj,y_trainj): info_path = './models_information/'+_id+'_information' info_file = open(info_path,'w') print('Model training begins!') try: # read data X_train = np.array(pd.DataFrame(json.loads(X_trainj))) y_train = np.array(pd.DataFrame(json.loads(y_trainj)))[:,0] params = json.loads(paramsj) #print(y_train) dm = DataManager(X_train, y_train) train_data = dm.get_data_node(X_train, y_train) save_dir = '../data/eval_exps/soln-ml' if not os.path.exists(save_dir): os.makedirs(save_dir) # train mode if(obj == 'clf'): mdl = Classifier(time_limit=params['time_limit'], output_dir=save_dir, ensemble_method=params['ensemble_method'], evaluation=params['evaluation'], metric=params['metric'], n_jobs=4) elif(obj == 'reg'): mdl = rgs = Regressor(metric=params['metric'], ensemble_method=params['ensemble_method'], evaluation=params['evaluation'], time_limit=params['time_limit'], output_dir=save_dir, random_state=1, n_jobs=n_jobs) mdl.fit(train_data) except: print('Model training failed!') info_file.write('Model training failed!') info_file.close() return -1 result = dict() result['best_algo_id'] = str(mdl.best_algo_id) result['best_hpo_config'] = str(mdl.best_hpo_config) result['nbest_algo_id'] = str(mdl.nbest_algo_id) result['best_perf'] = str(mdl.best_perf) result['best_fe_config'] = str(mdl.best_fe_config) result['get_ens_model_info'] = str(mdl.get_ens_model_info) #get_ens_model_info is not realized in this version yet info_file.write(json.dumps(result)) info_file.close() print('Model training finished!') return 0
ensemble_method = args.ens_method if ensemble_method == 'none': ensemble_method = None save_dir = './data/eval_exps/soln-ml' if not os.path.exists(save_dir): os.makedirs(save_dir) print('==> Start to evaluate with Budget %d' % time_limit) iris = load_iris() X, y = iris.data, iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1) dm = DataManager(X_train, y_train) train_data = dm.get_data_node(X_train, y_train) test_data = dm.get_data_node(X_test, y_test) clf = Classifier(time_limit=time_limit,output_dir=save_dir,ensemble_method=ensemble_method,evaluation=eval_type,metric='acc',n_jobs=n_jobs) clf.fit(train_data) pred = clf.predict(test_data) print(pred) print(balanced_accuracy_score(test_data.data[1], pred)) #save and load example #saveloadmodel.save_model(clf,'./data/model_clf9') #ens = saveloadmodel.load_model('./data/model_clf9') #print(ens.predict_proba(X_test))
import numpy as np import os import sys sys.path.append(os.getcwd()) from solnml.components.feature_engineering.transformations.preprocessor.text2vector import \ Text2VectorTransformation from solnml.components.feature_engineering.transformation_graph import DataNode from solnml.components.utils.constants import * from solnml.estimators import Classifier x = np.array([[1, 'I am good', 'I am right', 3], [2, 'He is good', 'He is ok', 4], [2.5, 'Everyone is good', 'Everyone is ok', 7], [1.3333, 'well', 'what', 5]]) y = np.array([0, 1, 0, 1]) t2v = Text2VectorTransformation() data = (x, y) feature_type = [NUMERICAL, TEXT, TEXT, DISCRETE] datanode = DataNode(data, feature_type) clf = Classifier(time_limit=20, enable_meta_algorithm_selection=False, include_algorithms=['random_forest']) clf.fit(datanode, opt_strategy='combined') print(clf.predict(datanode))