def GetSuggestions(self, request, context): """ Main function to provide suggestion. """ algorithm_name, config = OptimizerConfiguration.convertAlgorithmSpec( request.experiment.spec.algorithm) if algorithm_name != "bayesianoptimization": raise Exception( "Failed to create the algorithm: {}".format(algorithm_name)) if self.is_first_run: search_space = HyperParameterSearchSpace.convert( request.experiment) self.base_service = BaseSkoptService( base_estimator=config.base_estimator, n_initial_points=config.n_initial_points, acq_func=config.acq_func, acq_optimizer=config.acq_optimizer, random_state=config.random_state, search_space=search_space) self.is_first_run = False trials = Trial.convert(request.trials) new_trials = self.base_service.getSuggestions(trials, request.request_number) return api_pb2.GetSuggestionsReply( parameter_assignments=Assignment.generate(new_trials))
def GetSuggestions(self, request, context): """ Main function to provide suggestion. """ try: reply = api_pb2.GetSuggestionsReply() experiment = request.experiment self.all_trials = request.trials alg_settings = experiment.spec.algorithm.algorithm_settings param = HyperBandParam.convert(alg_settings) if param.current_s < 0: # Hyperband outlerloop has finished return reply # This is a hack to get request number. param.n = request.request_number trials = self._make_bracket(experiment, param) for trial in trials: reply.parameter_assignments.add( assignments=trial.parameter_assignments.assignments) reply.algorithm.CopyFrom(HyperBandParam.generate(param)) return reply except Exception as e: logger.error("Fail to generate trials: \n%s", traceback.format_exc(), extra={"experiment_name": experiment.name}) raise e
def GetSuggestions(self, request, context): """ Main function to provide suggestion. """ with self.lock: if self.study is None: self.search_space = HyperParameterSearchSpace.convert(request.experiment) self.study = self._create_study(request.experiment.spec.algorithm, self.search_space) trials = Trial.convert(request.trials) if len(trials) != 0: self._tell(trials) list_of_assignments = self._ask(request.request_number) return api_pb2.GetSuggestionsReply( parameter_assignments=Assignment.generate(list_of_assignments) )
def GetSuggestions(self, request, context): if self.is_first_run: nas_config = request.experiment.spec.nas_config num_layers = str(nas_config.graph_config.num_layers) search_space = get_search_space(nas_config.operations) settings_raw = request.experiment.spec.algorithm.algorithm_settings algorithm_settings = get_algorithm_settings(settings_raw) search_space_json = json.dumps(search_space) algorithm_settings_json = json.dumps(algorithm_settings) search_space_str = str(search_space_json).replace('\"', '\'') algorithm_settings_str = str(algorithm_settings_json).replace( '\"', '\'') self.is_first_run = False parameter_assignments = [] for i in range(request.current_request_number): self.logger.info(">>> Generate new Darts Trial Job") self.logger.info(">>> Number of layers {}\n".format(num_layers)) self.logger.info(">>> Search Space") self.logger.info("{}\n".format(search_space_str)) self.logger.info(">>> Algorithm Settings") self.logger.info("{}\n\n".format(algorithm_settings_str)) parameter_assignments.append( api_pb2.GetSuggestionsReply.ParameterAssignments(assignments=[ api_pb2.ParameterAssignment(name="algorithm-settings", value=algorithm_settings_str), api_pb2.ParameterAssignment(name="search-space", value=search_space_str), api_pb2.ParameterAssignment(name="num-layers", value=num_layers) ])) return api_pb2.GetSuggestionsReply( parameter_assignments=parameter_assignments)
def GetSuggestions(self, request, context): """ Main function to provide suggestion. """ if self.is_first_run: search_space = HyperParameterSearchSpace.convert( request.experiment) self.base_service = BaseChocolateService( algorithm_name=request.experiment.spec.algorithm. algorithm_name, search_space=search_space) self.is_first_run = False trials = Trial.convert(request.trials) new_assignments = self.base_service.getSuggestions( trials, request.request_number) return api_pb2.GetSuggestionsReply( parameter_assignments=Assignment.generate(new_assignments))
def GetSuggestions(self, request, context): """ Main function to provide suggestion. """ name, config = OptimizerConfiguration.convert_algorithm_spec( request.experiment.spec.algorithm) if self.is_first_run: search_space = HyperParameterSearchSpace.convert( request.experiment) self.base_service = BaseHyperoptService(algorithm_name=name, algorithm_conf=config, search_space=search_space) self.is_first_run = False trials = Trial.convert(request.trials) new_assignments = self.base_service.getSuggestions( trials, request.current_request_number) return api_pb2.GetSuggestionsReply( parameter_assignments=Assignment.generate(new_assignments))
def GetSuggestions(self, request, context): if self.is_first_run: self.experiment = EnasExperiment(request, self.logger) experiment = self.experiment if request.current_request_number > 0: experiment.num_trials = request.current_request_number self.logger.info( "-" * 100 + "\nSuggestion Step {} for Experiment {}\n".format( experiment.suggestion_step, experiment.experiment_name) + "-" * 100) self.logger.info("") self.logger.info(">>> Current Request Number:\t\t{}".format( experiment.num_trials)) self.logger.info("") with experiment.tf_graph.as_default(): saver = tf.compat.v1.train.Saver() ctrl = experiment.controller controller_ops = { "loss": ctrl.loss, "entropy": ctrl.sample_entropy, "grad_norm": ctrl.grad_norm, "baseline": ctrl.baseline, "skip_rate": ctrl.skip_rate, "train_op": ctrl.train_op, "train_step": ctrl.train_step, "sample_arc": ctrl.sample_arc, "child_val_accuracy": ctrl.child_val_accuracy, } if self.is_first_run: self.logger.info( ">>> First time running suggestion for {}. Random architecture will be given." .format(experiment.experiment_name)) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) candidates = list() for _ in range(experiment.num_trials): candidates.append( sess.run(controller_ops["sample_arc"])) # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart saver.save(sess, experiment.ctrl_cache_file) self.is_first_run = False else: with tf.compat.v1.Session() as sess: saver.restore(sess, experiment.ctrl_cache_file) result = self.GetEvaluationResult(request.trials) # TODO: (andreyvelich) I deleted this part, should it be handle by controller? # Sometimes training container may fail and GetEvaluationResult() will return None # In this case, the Suggestion will: # 1. Firstly try to respawn the previous trials after waiting for RESPAWN_SLEEP seconds # 2. If respawning the trials for RESPAWN_LIMIT times still cannot collect valid results, # then fail the task because it may indicate that the training container has errors. if result is None: self.logger.warning( ">>> Suggestion has spawned trials, but they all failed." ) self.logger.warning( ">>> Please check whether the training container is correctly implemented" ) self.logger.info(">>> Experiment {} failed".format( experiment.experiment_name)) return [] # This LSTM network is designed to maximize the metrics # However, if the user wants to minimize the metrics, we can take the negative of the result if experiment.opt_direction == api_pb2.MINIMIZE: result = -result self.logger.info( ">>> Suggestion updated. LSTM Controller Training\n") log_every = experiment.algorithm_settings[ "controller_log_every_steps"] for ctrl_step in range( 1, experiment. algorithm_settings["controller_train_steps"] + 1): run_ops = [ controller_ops["loss"], controller_ops["entropy"], controller_ops["grad_norm"], controller_ops["baseline"], controller_ops["skip_rate"], controller_ops["train_op"] ] loss, entropy, grad_norm, baseline, skip_rate, _ = sess.run( fetches=run_ops, feed_dict={ controller_ops["child_val_accuracy"]: result }) controller_step = sess.run( controller_ops["train_step"]) if ctrl_step % log_every == 0: log_string = "" log_string += "Controller Step: {} - ".format( controller_step) log_string += "Loss: {:.4f} - ".format(loss) log_string += "Entropy: {:.9} - ".format(entropy) log_string += "Gradient Norm: {:.7f} - ".format( grad_norm) log_string += "Baseline={:.4f} - ".format(baseline) log_string += "Skip Rate={:.4f}".format(skip_rate) self.logger.info(log_string) candidates = list() for _ in range(experiment.num_trials): candidates.append( sess.run(controller_ops["sample_arc"])) saver.save(sess, experiment.ctrl_cache_file) organized_candidates = list() parameter_assignments = list() for i in range(experiment.num_trials): arc = candidates[i].tolist() organized_arc = [0 for _ in range(experiment.num_layers)] record = 0 for layer in range(experiment.num_layers): organized_arc[layer] = arc[record:record + layer + 1] record += layer + 1 organized_candidates.append(organized_arc) nn_config = dict() nn_config['num_layers'] = experiment.num_layers nn_config['input_sizes'] = experiment.input_sizes nn_config['output_sizes'] = experiment.output_sizes nn_config['embedding'] = dict() for layer in range(experiment.num_layers): opt = organized_arc[layer][0] nn_config['embedding'][opt] = experiment.search_space[ opt].get_dict() organized_arc_json = json.dumps(organized_arc) nn_config_json = json.dumps(nn_config) organized_arc_str = str(organized_arc_json).replace('\"', '\'') nn_config_str = str(nn_config_json).replace('\"', '\'') self.logger.info( "\n>>> New Neural Network Architecture Candidate #{} (internal representation):" .format(i)) self.logger.info(organized_arc_json) self.logger.info("\n>>> Corresponding Seach Space Description:") self.logger.info(nn_config_str) parameter_assignments.append( api_pb2.GetSuggestionsReply.ParameterAssignments(assignments=[ api_pb2.ParameterAssignment(name="architecture", value=organized_arc_str), api_pb2.ParameterAssignment(name="nn_config", value=nn_config_str) ])) self.logger.info("") self.logger.info(">>> {} Trials were created for Experiment {}".format( experiment.num_trials, experiment.experiment_name)) self.logger.info("") experiment.suggestion_step += 1 return api_pb2.GetSuggestionsReply( parameter_assignments=parameter_assignments)