def optimize_model_parameter_split(x, y, model_name=None, loss_function="accuracy", parameter=None, max_evals=100, n_folds=5, isWrite=True, times=1, problem_pattern="classification"): """ hyperopt model turning """ if model_name == None and parameter == None: print "you must set parameter or model_name" return None elif parameter != None: param = parameter elif model_name != None: param = parameter_dictionary[model_name] else: return None x_trains = [] x_tests = [] y_trains = [] y_tests = [] for time in xrange(times): x_train, x_test, y_train, y_test = cross_validation.train_test_split( x, y, test_size=0.0125) x_trains.append(x_train) x_tests.append(x_test) y_trains.append(y_train) y_tests.append(y_test) trials = Trials() function = lambda param: optimize_model_function_split( param, x_trains, x_tests, y_trains, y_tests, loss_function) print param print "========================================================================" best_param = fmin(function, param, algo=tpe.suggest, max_evals=max_evals, trials=trials) print "========================================================================" print "write result to csv files" # write the csv file if isWrite: datas = [] for trial_data in trials.trials: print trial_data trial_parameter_dictionary = {} trial_parameter_dictionary['model'] = model_name trial_parameter_dictionary['tid'] = trial_data['misc']['tid'] for key, value in trial_data['misc']['vals'].items(): print key, value[0] trial_parameter_dictionary[key] = value[0] trial_parameter_dictionary['loss'] = trial_data['result']['loss'] trial_parameter_dictionary[ 'status'] = trial_data['result']['status'] datas.append(trial_parameter_dictionary) filename = str(time.time()) + ".csv" dictionary_in_list_convert_to_csv(datas, filename) print trials.statuses() return best_param
def main(): #with open(RESULTS_FILE, 'rb') as cached_pcd_file: # cache_data = pickle.load(cached_pcd_file) # pprint.pprint(cache_data) #return #with open(RESULTS_FILE, 'rb') as cached_pcd_file: # cache_data = pickle.load(cached_pcd_file) # for alg in ALGORITHMS: # if ALGORITHMS[alg]["do_hyper"]: # ALGORITHMS[alg]["opt_param"] = cache_data[alg]["opt_param"] #### STEP 1 - Get classified pointcloud #### environment = PointCloudEnvironment(my_print, TERRAIN_ASSESSMENT_FILE, POINTCLOUD_FILE) coverable_points = environment.coverable_pcd.points traversable_points = environment.traversable_pcd.points motion_planner = MotionPlanner(my_print, environment.traversable_pcd) #If from terrain assessment file: #with open(TERRAIN_ASSESSMENT_FILE, 'rb') as cached_pcd_file: # cache_data = pickle.load(cached_pcd_file) # coverable_points = cache_data["coverable_points"] # traversable_points = cache_data["traversable_points"] #traversable_pcd = PointCloud(my_print, points= traversable_points) #motion_planner = MotionPlanner(my_print, traversable_pcd) #### STEP 2 - Hyper parameters #### for algorithm_key, algorithm in ALGORITHMS.items(): if algorithm["do_hyper"]: trials = Trials() hyper_optimizer = HyptoOptimizer(save_data, algorithm, my_print, HYPER_START_POS, motion_planner, coverable_points) if algorithm_key == "BA*": opt_param = fmin(hyper_optimizer.hyper_test_bastar, space=(hp.uniform('angle_offset', 0, np.pi * 2), hp.uniform('step_size', 0.5, 1), hp.uniform('visited_threshold', 0.25, 0.5)), algo=tpe.suggest, max_evals=HYPER_MAX_EVAL, trials=trials) elif algorithm_key == "Inward Spiral": opt_param = fmin(hyper_optimizer.hyper_test_inward_spiral, space=(hp.uniform('step_size', 0.5, 1), hp.uniform('visited_threshold', 0.25, 0.5)), algo=tpe.suggest, max_evals=HYPER_MAX_EVAL, trials=trials) elif algorithm_key == "Sampled BA*": coverage_2 = algorithm["hyper_min_coverage"] / 100 opt_param = fmin( hyper_optimizer.hyper_test_sampled_bastar_param, space=(hp.uniform('coverage_1', 0.25, coverage_2), hp.uniform('coverage_2', coverage_2 - 0.025, coverage_2), hp.uniform('max_distance', 1, 10), hp.uniform('max_distance_part_II', 1, 20), hp.uniform('max_iterations', 30, 150), hp.uniform('min_bastar_coverage', 0.005, 0.05), hp.uniform('min_spiral_length', 2, 100), hp.uniform('nbr_of_angles', 0.6, 8.4), hp.uniform('step_size', 0.66, 1.33), hp.uniform('visited_threshold', 0.25, 0.5)), algo=tpe.suggest, max_evals=HYPER_MAX_EVAL, trials=trials) print(trials.statuses()) algorithm["opt_param"] = opt_param algorithm["hyper_data"] = trials.trials ALGORITHMS[algorithm_key] = algorithm save_data(ALGORITHMS) #### STEP 3 - Full tests #### for start_point_nr in range(NUMBER_OF_START_POINTS): #start_point = get_random_point(traversable_points) start_point = start_points[start_point_nr] print("Start point " + str(start_point_nr) + ": " + str(start_point)) for algorithm_key, algorithm in ALGORITHMS.items(): if algorithm["do_experiment"]: experimenter = Experimenter(algorithm, print) parameters = None if "opt_param" in algorithm: parameters = algorithm["opt_param"] cpp = algorithm["cpp"](my_print, motion_planner, coverable_points, algorithm["experiment_time_limit"], parameters) if "sample_specific_stats" in algorithm: experimenter.perform_sample_cpp(cpp, start_point, start_point_nr) algorithm["sample_specific_stats"].append( experimenter.sample_specific_stats) else: experimenter.perform_cpp(cpp, start_point, start_point_nr) algorithm["experiment_results"].append(experimenter.results) ALGORITHMS[algorithm_key] = algorithm save_data(ALGORITHMS)
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials def objective(x): return { 'loss': x**2, 'status': STATUS_OK, 'eval_time': time.time(), 'other_stuff': { 'type': None, 'value': [0, 1, 2] }, 'attachments': { 'time_module': pickle.dumps(time.time) } } trials = Trials() best = fmin(objective, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100, trials=trials) print best print trials.trials[0] print trials.results[0] print trials.losses()[0] print trials.statuses()[0]
def main(): #with open(RESULTS_FILE, 'rb') as cached_pcd_file: # cache_data = pickle.load(cached_pcd_file) # pprint.pprint(cache_data) #return with open(RESULTS_FILE, 'rb') as cached_pcd_file: cache_data = pickle.load(cached_pcd_file) ALGORITHMS = deepcopy(cache_data) for alg in ALGORITHMS: ALGORITHMS[alg]["do_hyper"] = False ALGORITHMS[alg][ "cpp"] = lambda print, motion_planner, cov_points, time_limit, parameters: RandomBAstar3( print, motion_planner, PointCloud( print, points=cov_points), time_limit, parameters) #### STEP 1 - Get classified pointcloud #### environment = PointCloudEnvironment(my_print, TERRAIN_ASSESSMENT_FILE, POINTCLOUD_FILE) coverable_points = environment.coverable_pcd.points traversable_points = environment.traversable_pcd.points motion_planner = MotionPlanner(my_print, environment.traversable_pcd) #If from terrain assessment file: #with open(TERRAIN_ASSESSMENT_FILE, 'rb') as cached_pcd_file: # cache_data = pickle.load(cached_pcd_file) # coverable_points = cache_data["coverable_points"] # traversable_points = cache_data["traversable_points"] #traversable_pcd = PointCloud(my_print, points= traversable_points) #motion_planner = MotionPlanner(my_print, traversable_pcd) #### STEP 2 - Hyper parameters #### for algorithm_key, algorithm in ALGORITHMS.items(): if algorithm["do_hyper"]: trials = Trials() hyper_optimizer = HyptoOptimizer(save_data, algorithm, my_print, HYPER_START_POS, motion_planner, coverable_points) opt_param = fmin( hyper_optimizer.hyper_test_newest_sampled_bastar_param, space=(hp.uniform('ba_exploration', 0.75, 0.95), hp.uniform('max_distance', 1, 5), hp.uniform('max_distance_part_II', 4, 10), hp.uniform('min_bastar_cost_per_coverage', 5000, 10000), hp.uniform('min_spiral_cost_per_coverage', 10000, 20000), hp.uniform('step_size', 0.5, 1.0), hp.uniform('visited_threshold', 0.25, 0.5)), algo=tpe.suggest, max_evals=HYPER_MAX_EVAL, trials=trials) print(trials.statuses()) algorithm["opt_param"] = opt_param algorithm["hyper_data"] = trials.trials ALGORITHMS[algorithm_key] = algorithm save_data(ALGORITHMS) #### STEP 3 - Full tests #### for start_point_nr in range(NUMBER_OF_START_POINTS): #start_point = get_random_point(traversable_points) start_point = start_points[start_point_nr] print("Start point " + str(start_point_nr) + ": " + str(start_point)) for algorithm_key, algorithm in ALGORITHMS.items(): if algorithm["do_experiment"]: experimenter = Experimenter(algorithm, print) parameters = None if "opt_param" in algorithm: parameters = algorithm["opt_param"] cpp = algorithm["cpp"](my_print, motion_planner, coverable_points, algorithm["experiment_time_limit"], parameters) if "sample_specific_stats" in algorithm: experimenter.perform_sample_cpp(cpp, start_point, start_point_nr) algorithm["sample_specific_stats"].append( experimenter.sample_specific_stats) else: experimenter.perform_cpp(cpp, start_point, start_point_nr) algorithm["experiment_results"].append(experimenter.results) ALGORITHMS[algorithm_key] = algorithm save_data(ALGORITHMS)
def optimize_model_parameter_validation(x, y, model_name=None, loss_function="accuracy", parameter=None, max_evals=100, n_folds=5, isWrite=True, problem_pattern="classification"): """ hyperopt model turning """ if model_name == None and parameter == None: print "you must set parameter or model_name" return None elif parameter != None: param = parameter elif model_name != None: param = parameter_dictionary[model_name] else: return None validation_indexs = [] if problem_pattern == "classification": for train_index, test_index in cross_validation.StratifiedKFold(y, n_folds=n_folds): validation_indexs.append((train_index, test_index)) else: for train_index, test_index in cross_validation.KFold(len(y), n_folds=n_folds): validation_indexs.append((train_index, test_index)) trials = Trials() function = lambda param: optimize_model_function( param, x, y, validation_indexs, loss_function) print param print "========================================================================" best_param = fmin(function, param, algo=tpe.suggest, max_evals=max_evals, trials=trials) print "========================================================================" print "write result to csv files" # write the csv file if isWrite: datas = [] for trial_data in trials.trials: print trial_data trial_parameter_dictionary = {} trial_parameter_dictionary['model'] = model_name trial_parameter_dictionary['tid'] = trial_data['misc']['tid'] for key, value in trial_data['misc']['vals'].items(): print key, value[0] trial_parameter_dictionary[key] = value[0] trial_parameter_dictionary['loss'] = trial_data['result']['loss'] trial_parameter_dictionary[ 'status'] = trial_data['result']['status'] datas.append(trial_parameter_dictionary) filename = str(time.time()) + ".csv" dictionary_in_list_convert_to_csv(datas, filename) print trials.statuses() return best_param def model_evaluation(clf, x, y, evaluate_function_name, labeled_type, label_convert_type="normal"): if evaluate_function_name == "accuracy": y_pred = clf.predict(x) score = evaluate_function( y, y_pred, evaluate_function_name) score = -score elif evaluate_function_name == "logloss": y_pred = clf.predict(x) score = evaluate_function( y, y_pred, evaluate_function_name) train_score = -score elif evaluate_function_name == "mean_squared_error": y_pred = clf.predict(x) score = evaluate_function( y, y_pred, evaluate_function_name) elif evaluate_function_name == "gini": y_pred = clf.predict(x) score = evaluate_function( y, y_pred, evaluate_function_name) score = -score train_score = -train_score elif evaluate_function_name == "rmsle": y_pred = clf.predict(x) score = evaluate_function( y, y_pred, evaluate_function_name) elif evaluate_function_name == "auc": if params['model'] == "XGBREGLOGISTIC": y_pred = clf.predict_proba(x_test) else: y_pred = clf.predict_proba(x_test)[:, 1] train_score = evaluate_function( y_train, train_y_pred, evaluate_function_name) score = evaluate_function(y_test, y_pred, evaluate_function_name) score = -score train_score = -train_score elif evaluate_function_name == "rmspe": y_pred = clf.predict(x) score = evaluate_function( y, y_pred, evaluate_function_name) score = score return score
class HyperoptImpl: def __init__(self, estimator=None, max_evals=50, cv=5, handle_cv_failure=False, scoring='accuracy', best_score=0.0, max_opt_time=None, max_eval_time=None, pgo:Optional[PGO]=None, show_progressbar=True, args_to_scorer=None, verbose=False): self.max_evals = max_evals if estimator is None: self.estimator = LogisticRegression() else: self.estimator = estimator self.search_space = hp.choice('meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)]) self.scoring = scoring self.best_score = best_score self.handle_cv_failure = handle_cv_failure self.cv = cv self._trials = Trials() self.max_opt_time = max_opt_time self.max_eval_time = max_eval_time self.show_progressbar = show_progressbar if args_to_scorer is not None: self.args_to_scorer = args_to_scorer else: self.args_to_scorer = {} self.verbose = verbose def fit(self, X_train, y_train): opt_start_time = time.time() self.cv = check_cv(self.cv, y = y_train, classifier=True) #TODO: Replace the classifier flag value by using tags? def hyperopt_train_test(params, X_train, y_train): warnings.filterwarnings("ignore") trainable = create_instance_from_hyperopt_search_space(self.estimator, params) try: cv_score, logloss, execution_time = cross_val_score_track_trials(trainable, X_train, y_train, cv=self.cv, scoring=self.scoring, args_to_scorer=self.args_to_scorer) logger.debug("Successful trial of hyperopt with hyperparameters:{}".format(params)) except BaseException as e: #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split(X_train, y_train, test_size=0.20) start = time.time() trained = trainable.fit(X_train_part, y_train_part) scorer = check_scoring(trainable, scoring=self.scoring) cv_score = scorer(trained, X_validation, y_validation, **self.args_to_scorer) execution_time = time.time() - start y_pred_proba = trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") else: logger.debug(e) logger.debug("Error {} with pipeline:{}".format(e, trainable.to_json())) raise e return cv_score, logloss, execution_time def proc_train_test(params, X_train, y_train, return_dict): return_dict['params'] = copy.deepcopy(params) try: score, logloss, execution_time = hyperopt_train_test(params, X_train=X_train, y_train=y_train) return_dict['loss'] = self.best_score - score return_dict['time'] = execution_time return_dict['log_loss'] = logloss return_dict['status'] = STATUS_OK except BaseException as e: logger.warning(f"Exception caught in Hyperopt:{type(e)}, {traceback.format_exc()} with hyperparams: {params}, setting status to FAIL") return_dict['status'] = STATUS_FAIL return_dict['error_msg'] = f"Exception caught in Hyperopt:{type(e)}, {traceback.format_exc()} with hyperparams: {params}" if self.verbose: print(return_dict['error_msg']) def get_final_trained_estimator(params, X_train, y_train): warnings.filterwarnings("ignore") trainable = create_instance_from_hyperopt_search_space(self.estimator, params) trained = trainable.fit(X_train, y_train) return trained def f(params): current_time = time.time() if (self.max_opt_time is not None) and ((current_time - opt_start_time) > self.max_opt_time) : # if max optimization time set, and we have crossed it, exit optimization completely sys.exit(0) if self.max_eval_time: # Run hyperopt in a subprocess that can be interupted manager = multiprocessing.Manager() proc_dict = manager.dict() p = multiprocessing.Process( target=proc_train_test, args=(params, X_train, y_train, proc_dict)) p.start() p.join(self.max_eval_time) if p.is_alive(): p.terminate() p.join() logger.warning(f"Maximum alloted evaluation time exceeded. with hyperparams: {params}, setting status to FAIL") proc_dict['status'] = STATUS_FAIL if 'status' not in proc_dict: logger.warning(f"Corrupted results, setting status to FAIL") proc_dict['status'] = STATUS_FAIL else: proc_dict = {} proc_train_test(params, X_train, y_train, proc_dict) return proc_dict try : fmin(f, self.search_space, algo=tpe.suggest, max_evals=self.max_evals, trials=self._trials, rstate=np.random.RandomState(SEED), show_progressbar=self.show_progressbar) except SystemExit : logger.warning('Maximum alloted optimization time exceeded. Optimization exited prematurely') except AllTrialsFailed: self._best_estimator = None if STATUS_OK not in self._trials.statuses(): raise ValueError('Error from hyperopt, none of the trials succeeded.') try : best_params = space_eval(self.search_space, self._trials.argmin) logger.info( 'best score: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}'.format( self.best_score - self._trials.average_best_error(), self.max_evals, best_params ) ) trained = get_final_trained_estimator(best_params, X_train, y_train) self._best_estimator = trained except BaseException as e : logger.warning('Unable to extract the best parameters from optimization, the error: {}'.format(e)) self._best_estimator = None return self def predict(self, X_eval): import warnings warnings.filterwarnings("ignore") if self._best_estimator is None: raise ValueError("Can not predict as the best estimator is None. Either an attempt to call `predict` " "before calling `fit` or all the trials during `fit` failed.") trained = self._best_estimator try: predictions = trained.predict(X_eval) except ValueError as e: logger.warning("ValueError in predicting using Hyperopt:{}, the error is:{}".format(trained, e)) predictions = None return predictions def summary(self): """Table summarizing the trial results (ID, loss, time, log_loss, status). Returns ------- result : DataFrame""" def make_record(trial_dict): try: loss = trial_dict['result']['loss'] except BaseException: loss = np.nan try: time = trial_dict['result']['time'] except BaseException: time = '-' try: log_loss = trial_dict['result']['log_loss'] except BaseException: log_loss = np.nan return { 'name': f'p{trial_dict["tid"]}', 'tid': trial_dict['tid'], 'loss': trial_dict['result'].get('loss', float('nan')), 'time': trial_dict['result'].get('time', float('nan')), 'log_loss': trial_dict['result'].get('log_loss', float('nan')), 'status': trial_dict['result']['status']} records = [make_record(td) for td in self._trials.trials] result = pd.DataFrame.from_records(records, index='name') return result def get_pipeline(self, pipeline_name=None, astype='lale'): """Retrieve one of the trials. Parameters ---------- pipeline_name : union type, default None - string Key for table returned by summary(), return a trainable pipeline. - None When not specified, return the best trained pipeline found. astype : 'lale' or 'sklearn', default 'lale' Type of resulting pipeline. Returns ------- result : Trained operator if best, trainable operator otherwise. """ if pipeline_name is None: result = getattr(self, '_best_estimator', None) else: tid = int(pipeline_name[1:]) params = self._trials.trials[tid]['result']['params'] result = create_instance_from_hyperopt_search_space( self.estimator, params) if result is None or astype == 'lale': return result assert astype == 'sklearn', astype return result.export_to_sklearn_pipeline()
def wikiLearn(): """ 不是特别懂 """ # 1、简单的函数 from hyperopt import fmin, tpe, hp best = fmin(fn=lambda x: x ** 2, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100) print best # 2、使用函数+ok状态 from hyperopt import fmin, tpe, hp, STATUS_OK def objective(x): return {'loss': x ** 2, 'status': STATUS_OK } best = fmin(objective, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100) print best # 3、使用dict的返回 import pickle import time from hyperopt import fmin, tpe, hp, STATUS_OK, Trials def objective(x): return { 'loss': x ** 2, 'status': STATUS_OK, # -- store other results like this 'eval_time': time.time(), 'other_stuff': {'type': None, 'value': [0, 1, 2]}, # -- attachments are handled differently 'attachments': {'time_module': pickle.dumps(time.time)} } trials = Trials() best = fmin(objective, space=hp.uniform('x', -10, 10), algo=tpe.suggest, max_evals=100, trials=trials) print best print trials.trials print trials.results print trials.losses() print trials.statuses() # 没明白 attachments 是什么意思 msg = trials.trial_attachments(trials.trials[5])['time_module'] time_module = pickle.loads(msg) from hyperopt import hp space = hp.choice('a', [ ('case 1', 1 + hp.lognormal('c1', 0, 1)), ('case 2', hp.uniform('c2', -10, 10)) ]) import hyperopt.pyll.stochastic print hyperopt.pyll.stochastic.sample(space) # hp.choice(label, options) # hp.randint(label, upper) # [0,upper] # hp.uniform(label, low, high) # hp.quniform(label, low, high, q) # round(uniform(low, high) / q) * q # hp.loguniform(label, low, high) # hp.qloguniform(label, low, high, q) # round(exp(uniform(low, high)) / q) * q # hp.normal(label, mu, sigma) # hp.qnormal(label, mu, sigma, q) # round(normal(mu, sigma) / q) * q # hp.lognormal(label, mu, sigma) # hp.qlognormal(label, mu, sigma, q) # round(exp(normal(mu, sigma)) / q) * q # 4、对于sklearn使用 from hyperopt import hp space = hp.choice('classifier_type', [ { 'type': 'naive_bayes', }, { 'type': 'svm', 'C': hp.lognormal('svm_C', 0, 1), 'kernel': hp.choice('svm_kernel', [ {'ktype': 'linear'}, {'ktype': 'RBF', 'width': hp.lognormal('svm_rbf_width', 0, 1)}, ]), }, { 'type': 'dtree', 'criterion': hp.choice('dtree_criterion', ['gini', 'entropy']), 'max_depth': hp.choice('dtree_max_depth', [None, hp.qlognormal('dtree_max_depth_int', 3, 1, 1)]), 'min_samples_split': hp.qlognormal('dtree_min_samples_split', 2, 1, 1), }, ]) # 5、还是没有搞懂 scope.define import hyperopt.pyll from hyperopt.pyll import scope @scope.define def foo(a, b=0): print 'running foo', a, b return a + b / 2 # -- this will print 0, foo is called as usual. print foo(0) # In describing search spaces you can use `foo` as you # would in normal Python. These two calls will not actually call foo, # they just record that foo should be called to evaluate the graph. space1 = scope.foo(hp.uniform('a', 0, 10)) space2 = scope.foo(hp.uniform('a', 0, 10), hp.normal('b', 0, 1)) # -- this will print an pyll.Apply node print space1 # -- this will draw a sample by running foo() print hyperopt.pyll.stochastic.sample(space1)
def best_net( # output best_net_filename, best_preproc_filename, best_params_filename, best_stage_results_filename, # search process parameters: overfit, max_evals, trials_filename, overwrite_trials, temp_net_filename, temp_preproc_filename, temp_stage_results_filename, dataset_dir, dataset_name=None, dataset_size=None, dataset_static=False, dataset_inmem=False, shuffle_train=True, seed=42, image_data_format=K.image_data_format(), cache_datasets=False, preproc='default', min_epochs=3, max_epochs=50, retrain=False): # from the search space we get: # no_xval, no_test, # features, img_side, resolution_degrees, grayscale, angle_encoding, force_xy, bounding, dropout, # batch_size, optimizer, lr, optimizer_kwargs if os.path.isfile(trials_filename) and not overwrite_trials: with open(trials_filename, 'rb') as trials_file: trials = pickle.load(trials_file) else: space = get_space(overfit) trials = Trials() def objective(args): args = fix_args(**args) safe_save(trials, trials_filename) print('experiment', objective.iter, '/', max_evals, '; best err:', objective.min) print('training args:', args) try: resolution_degrees = None err = train( temp_net_filename, temp_preproc_filename, temp_stage_results_filename, dataset_dir, dataset_name, dataset_size, dataset_static, dataset_inmem, shuffle_train, seed, image_data_format, args['no_xval'], args['no_test'], cache_datasets, args['features'], args['img_side'], resolution_degrees, args['grayscale'], preproc, args['angle_encoding'], args['force_xy'], args['bounding'], args['n_classes'], args['convs_per_block'], args['skip_layer_connections'], args['dropout'], args['l2_penalty'], args['batch_size'], args['optimizer'], args['lr'], args['optimizer_kwargs'], min_epochs, max_epochs, args['stages'], retrain) except: print('model training failed!') traceback.print_exc() return {'status': STATUS_FAIL} else: if err < objective.min: shutil.copyfile(temp_net_filename, best_net_filename) shutil.copyfile(temp_preproc_filename, best_preproc_filename) shutil.copyfile(temp_stage_results_filename, best_stage_results_filename) all_args = args.copy() all_args.update({ 'dataset_dir': dataset_dir, 'dataset_name': dataset_name, 'dataset_size': dataset_size, 'dataset_static': dataset_static, 'dataset_inmem': dataset_inmem, 'shuffle_train': shuffle_train, 'image_data_format': image_data_format, 'cache_datasets': cache_datasets, 'resolution_degrees': resolution_degrees, 'preproc': preproc, 'min_epochs': min_epochs, 'max_epochs': max_epochs, 'retrain': retrain }) with open(best_params_filename, 'wb') as best_params_file: pickle.dump(all_args, best_params_file) print('NEW BEST FOUND') objective.min = min(err, objective.min) # return {'loss': train_loss_at_best_xval, 'true_loss': -best_corr_xval, 'status': STATUS_OK, # 'model': model} return {'loss': err, 'status': STATUS_OK, 'args': args} finally: objective.iter += 1 gc.collect() # try to help free some memory... objective.min = numpy.inf objective.iter = 1 # by default, tpe.suggest runs 20 random configurations in the beginning to get a rough map of the space # to override this behaviour, use this: # algo = lambda *args, **kwargs: tpe.suggest(*args, n_startup_jobs=5,**kwargs) algo = tpe.suggest fmin(objective, space, algo, max_evals, trials, rstate=numpy.random.RandomState(42)) safe_save(trials, trials_filename) ok_id = [] for status_id, status in enumerate(trials.statuses()): if status == STATUS_OK: ok_id.append(status_id) losses = trials.losses() losses = [numpy.inf if loss is None else loss for loss in losses] min_loss = min(losses) min_loss_id_losses = losses.index(min_loss) min_loss_id = ok_id[min_loss_id_losses] best_args = trials.results[min_loss_id] return best_args
# why does hp.randint('x', 10) always return same number? # works without space=____ trials = Trials() best = fmin(objective, space=space, # space=hp.quniform('x', -10, 10, .00001), algo=tpe.suggest, max_evals=10, trials=trials) # why is this red? where is the syntax error? print best print hyperopt.space_eval(space, best) # always prints all the floats, regardless of status print trials.losses() print trials.statuses() print trials.results # here it's not red.. print best # msg = trials.trial_attachments(trials.trials[5])['time_module'] # time_module = pickle.loads(msg) # print time_module # print msg space = hp.choice('a', [ ('case 1', 1 + hp.randint('c1', 10)), ('case 2', hp.uniform('c2', -10, 10)) ]),