def getEvalHistory(self, studyID, obj_name, burn_in): worker_hist = [] x_train = [] y_train = [] channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: gwfrep = client.GetWorkerFullInfo( api_pb2.GetWorkerFullInfoRequest(study_id=studyID, only_latest_log=True), 10) worker_hist = gwfrep.worker_full_infos #self.logger.debug("Eval Trials Log: %r", worker_hist, extra={"StudyID": studyID}) for w in worker_hist: if w.Worker.status == api_pb2.COMPLETED: for ml in w.metrics_logs: if ml.name == obj_name: y_train.append(float(ml.values[-1].value)) x_train.append(w.parameter_set) break self.logger.info("%d completed trials are found.", len(x_train), extra={"StudyID": studyID}) if len(x_train) <= burn_in: x_train = [] y_train = [] self.logger.info( "Trials will be sampled until %d trials for burn-in are completed.", burn_in, extra={"StudyID": studyID}) else: self.logger.debug("Completed trials: %r", x_train, extra={"StudyID": studyID}) return x_train, y_train
def _get_study_param(self): # this function need to # 1) get the number of layers # 2) get the I/O size # 3) get the available operations # 4) get the optimization direction (i.e. minimize or maximize) # 5) get the objective name # 6) get the study name channel = grpc.beta.implementations.insecure_channel( MANAGER_ADDRESS, MANAGER_PORT) with api_pb2.beta_create_Manager_stub(channel) as client: api_study_param = client.GetStudy( api_pb2.GetStudyRequest(study_id=self.study_id), 10) self.study_name = api_study_param.study_config.name self.opt_direction = api_study_param.study_config.optimization_type self.objective_name = api_study_param.study_config.objective_value_name all_params = api_study_param.study_config.nas_config graph_config = all_params.graph_config self.num_layers = int(graph_config.num_layers) self.input_size = list(map(int, graph_config.input_size)) self.output_size = list(map(int, graph_config.output_size)) search_space_raw = all_params.operations search_space_object = SearchSpace(search_space_raw) self.search_space = search_space_object.search_space self.num_operations = search_space_object.num_operations self.print_search_space()
def GetEvaluationResult(self, study): channel = grpc.beta.implementations.insecure_channel( MANAGER_ADDRESS, MANAGER_PORT) with api_pb2.beta_create_Manager_stub(channel) as client: gwfrep = client.GetWorkerFullInfo( api_pb2.GetWorkerFullInfoRequest(study_id=study.study_id, only_latest_log=True), 10) trials_list = gwfrep.worker_full_infos completed_trials = dict() for t in trials_list: if t.Worker.trial_id in self.prev_trial_ids and t.Worker.status == api_pb2.COMPLETED: for ml in t.metrics_logs: if ml.name == study.objective_name: completed_trials[t.Worker.trial_id] = float( ml.values[-1].value) if len(completed_trials) == study.num_trials: self.logger.info(">>> Evaluation results of previous trials:") for k in completed_trials: self.logger.info("{}: {}".format(k, completed_trials[k])) avg_metrics = sum(completed_trials.values()) / study.num_trials self.logger.info("The average is {}\n".format(avg_metrics)) return avg_metrics
def getStudyConfig(self, studyID): channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: gsrep = client.GetStudy(api_pb2.GetStudyRequest(study_id=studyID), 10) return gsrep.study_config
def registerTrials(self, trials): channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: for i, t in enumerate(trials): ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t), 10) trials[i].trial_id = ctrep.trial_id return trials
def _get_suggestion_param(self): channel = grpc.beta.implementations.insecure_channel( MANAGER_ADDRESS, MANAGER_PORT) with api_pb2.beta_create_Manager_stub(channel) as client: api_suggestion_param = client.GetSuggestionParameters( api_pb2.GetSuggestionParametersRequest(param_id=self.param_id), 10) params_raw = api_suggestion_param.suggestion_parameters self.suggestion_config = parseSuggestionParam(params_raw) self.print_suggestion_params()
def GetEvaluationResult(self, studyID, trialID): worker_list = [] channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: gwfrep = client.GetWorkerFullInfo( api_pb2.GetWorkerFullInfoRequest(study_id=studyID, trial_id=trialID, only_latest_log=False), 10) worker_list = gwfrep.worker_full_infos for w in worker_list: if w.Worker.status == api_pb2.COMPLETED: for ml in w.metrics_logs: if ml.name == self.objective_name: samples = self.get_featuremap_statistics(ml) return samples
def GetSuggestions(self, request, context): if request.study_id != self.current_study_id: self.generate_arch(request) if self.current_itr == 0: self.arch = self.generator.get_init_arch() elif self.current_itr <= self.restruct_itr: result = self.GetEvaluationResult(request.study_id, self.prev_trial_id) self.arch = self.generator.get_arch(self.arch, result) self.logger.info("Architecture at itr={}".format(self.current_itr)) self.logger.info(self.arch) arch_json = json.dumps(self.arch) config_json = json.dumps(self.suggestion_config) arch = str(arch_json).replace('\"', '\'') config = str(config_json).replace('\"', '\'') trials = [] trials.append( api_pb2.Trial( study_id=request.study_id, parameter_set=[ api_pb2.Parameter(name="architecture", value=arch, parameter_type=api_pb2.CATEGORICAL), api_pb2.Parameter(name="parameters", value=config, parameter_type=api_pb2.CATEGORICAL), api_pb2.Parameter(name="current_itr", value=str(self.current_itr), parameter_type=api_pb2.CATEGORICAL) ], )) channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: for i, t in enumerate(trials): ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t), 10) trials[i].trial_id = ctrep.trial_id self.prev_trial_id = ctrep.trial_id self.current_itr += 1 return api_pb2.GetSuggestionsReply(trials=trials)
def _get_suggestion_param(self, paramID): channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: gsprep = client.GetSuggestionParameters( api_pb2.GetSuggestionParametersRequest(param_id=paramID), 10) params_raw = gsprep.suggestion_parameters suggestion_params = parseSuggestionParam(params_raw) self.suggestion_config = suggestion_params self.suggestion_config.update({"input_size": self.input_size[0]}) self.suggestion_config.update({"output_size": self.output_size[0]}) self.search_space.update({ "max_layers_per_stage": self.suggestion_config["max_layers_per_stage"] }) self.logger.info("Suggestion Config: {}".format( self.suggestion_config))
def GetEvaluationResult(self, studyID): worker_list = [] channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: gwfrep = client.GetWorkerFullInfo( api_pb2.GetWorkerFullInfoRequest(study_id=studyID, trial_id=self.prev_trial_id, only_latest_log=True), 10) worker_list = gwfrep.worker_full_infos for w in worker_list: if w.Worker.status == api_pb2.COMPLETED: for ml in w.metrics_logs: if ml.name == self.objective_name: self.logger.info( "Evaluation result of previous candidate: {}". format(ml.values[-1].value)) return float(ml.values[-1].value)
def _get_search_space(self, studyID): channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: gsrep = client.GetStudy(api_pb2.GetStudyRequest(study_id=studyID), 10) self.objective_name = gsrep.study_config.objective_value_name all_params = gsrep.study_config.nas_config graph_config = all_params.graph_config search_space_raw = all_params.operations self.stages = int(graph_config.num_layers) self.input_size = list(map(int, graph_config.input_size)) self.output_size = list(map(int, graph_config.output_size)) search_space_object = SearchSpace(search_space_raw) self.search_space = search_space_object.search_space self.search_space.update({"stages": self.stages}) self.logger.info("Search Space: {}".format(self.search_space))
def _get_suggestion_param(self, paramID, studyID): channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: gsprep = client.GetSuggestionParameters( api_pb2.GetSuggestionParametersRequest(param_id=paramID), 10) params_raw = gsprep.suggestion_parameters suggestion_params = parseSuggestionParam(params_raw) self.logger.info( "Parameters of LSTM Controller for Study {}:".format(studyID)) for spec in suggestion_params: if len(spec) > 13: self.logger.info("{}: \t{}".format(spec, suggestion_params[spec])) else: self.logger.info("{}: \t\t{}".format(spec, suggestion_params[spec])) self.suggestion_config = suggestion_params
def _get_search_space(self, studyID): # this function need to # 1) get the number of layers # 2) get the I/O size # 3) get the available operations # 4) get the optimization direction (i.e. minimize or maximize) # 5) get the objective name channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: gsrep = client.GetStudy(api_pb2.GetStudyRequest(study_id=studyID), 10) self.opt_direction = gsrep.study_config.optimization_type self.objective_name = gsrep.study_config.objective_value_name all_params = gsrep.study_config.nas_config graph_config = all_params.graph_config search_space_raw = all_params.operations self.num_layers = int(graph_config.num_layers) self.input_size = list(map(int, graph_config.input_size)) self.output_size = list(map(int, graph_config.output_size)) search_space_object = SearchSpace(search_space_raw) self.logger.info("Search Space for Study {}:".format(studyID)) self.search_space = search_space_object.search_space for opt in self.search_space: opt.print_op(self.logger) self.num_operations = search_space_object.num_operations self.logger.info("There are {} operations in total.\n".format( self.num_operations))
def GetSuggestions(self, request, context): if request.study_id not in self.registered_studies: self.setup_controller(request) self.is_first_run = True self.registered_studies.append(request.study_id) self.logger.info("-" * 80 + "\nSuggestion Step {} for Study {}\n".format( self.ctrl_step, request.study_id) + "-" * 80) with self.tf_graph.as_default(): saver = tf.train.Saver() ctrl = self.controllers controller_ops = { "train_step": ctrl.train_step, "loss": ctrl.loss, "train_op": ctrl.train_op, "lr": ctrl.lr, "grad_norm": ctrl.grad_norm, "optimizer": ctrl.optimizer, "baseline": ctrl.baseline, "entropy": ctrl.sample_entropy, "sample_arc": ctrl.sample_arc, "skip_rate": ctrl.skip_rate } run_ops = [ controller_ops["loss"], controller_ops["entropy"], controller_ops["lr"], controller_ops["grad_norm"], controller_ops["baseline"], controller_ops["skip_rate"], controller_ops["train_op"] ] if self.is_first_run: self.logger.info( "First time running suggestion for {}. Random architecture will be given." .format(request.study_id)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) arc = sess.run(controller_ops["sample_arc"]) # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart saver.save(sess, self.ctrl_cache_file) self.is_first_run = False else: with tf.Session() as sess: saver.restore(sess, self.ctrl_cache_file) valid_acc = ctrl.reward result = self.GetEvaluationResult(request.study_id) # This lstm cell is designed to maximize the metrics # However, if the user want to minimize the metrics, we can take the negative of the result if self.opt_direction == api_pb2.MINIMIZE: result = -result loss, entropy, lr, gn, bl, skip, _ = sess.run( fetches=run_ops, feed_dict={valid_acc: result}) self.logger.info( "Suggetion updated. LSTM Controller Loss: {}".format( loss)) arc = sess.run(controller_ops["sample_arc"]) saver.save(sess, self.ctrl_cache_file) arc = arc.tolist() organized_arc = [0 for _ in range(self.num_layers)] record = 0 for l in range(self.num_layers): organized_arc[l] = arc[record:record + l + 1] record += l + 1 nn_config = dict() nn_config['num_layers'] = self.num_layers nn_config['input_size'] = self.input_size nn_config['output_size'] = self.output_size nn_config['embedding'] = dict() for l in range(self.num_layers): opt = organized_arc[l][0] nn_config['embedding'][opt] = self.search_space[opt].get_dict() organized_arc_json = json.dumps(organized_arc) nn_config_json = json.dumps(nn_config) organized_arc_str = str(organized_arc_json).replace('\"', '\'') nn_config_str = str(nn_config_json).replace('\"', '\'') self.logger.info( "\nNew Neural Network Architecture (internal representation):") self.logger.info(organized_arc_json) self.logger.info("\nCorresponding Seach Space Description:") self.logger.info(nn_config_str) self.logger.info("") trials = [] trials.append( api_pb2.Trial( study_id=request.study_id, parameter_set=[ api_pb2.Parameter(name="architecture", value=organized_arc_str, parameter_type=api_pb2.CATEGORICAL), api_pb2.Parameter(name="nn_config", value=nn_config_str, parameter_type=api_pb2.CATEGORICAL) ], )) channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) with api_pb2.beta_create_Manager_stub(channel) as client: for i, t in enumerate(trials): ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t), 10) trials[i].trial_id = ctrep.trial_id self.logger.info("Trial {} Created\n".format(ctrep.trial_id)) self.prev_trial_id = ctrep.trial_id self.ctrl_step += 1 return api_pb2.GetSuggestionsReply(trials=trials)
def parseParameters(self, paramID): channel = grpc.beta.implementations.insecure_channel( self.manager_addr, self.manager_port) params = [] with api_pb2.beta_create_Manager_stub(channel) as client: gsprep = client.GetSuggestionParameters( api_pb2.GetSuggestionParametersRequest(param_id=paramID), 10) params = gsprep.suggestion_parameters parsed_service_params = { "N": 100, "model_type": "gp", "max_features": "auto", "length_scale": 0.5, "noise": 0.0005, "nu": 1.5, "kernel_type": "matern", "n_estimators": 50, "mode": "pi", "trade_off": 0.01, "trial_hist": "", "burn_in": 10, } modes = ["pi", "ei"] model_types = ["gp", "rf"] kernel_types = ["matern", "rbf"] for param in params: if param.name in parsed_service_params.keys(): if param.name == "length_scale" or param.name == "noise" or param.name == "nu" or param.name == "trade_off": try: float(param.value) except ValueError: self.logger.warning( "Parameter must be float for %s: %s back to default value", param.name, param.value) else: parsed_service_params[param.name] = float(param.value) elif param.name == "N" or param.name == "n_estimators" or param.name == "burn_in": try: int(param.value) except ValueError: self.logger.warning( "Parameter must be int for %s: %s back to default value", param.name, param.value) else: parsed_service_params[param.name] = int(param.value) elif param.name == "kernel_type": if param.value != "rbf" and param.value != "matern": parsed_service_params[param.name] = param.value else: self.logger.warning( "Unknown Parameter for %s: %s back to default value", param.name, param.value) elif param.name == "mode" and param.value in modes: if param.value != "lcb" and param.value != "ei" and param.value != "pi": parsed_service_params[param.name] = param.value else: self.logger.warning( "Unknown Parameter for %s: %s back to default value", param.name, param.value) elif param.name == "model_type" and param.value in model_types: if param.value != "rf" and param.value != "gp": parsed_service_params[param.name] = param.value else: self.logger.warning( "Unknown Parameter for %s: %s back to default value", param.name, param.value) else: self.logger.warning("Unknown Parameter name: %s ", param.name) return parsed_service_params
def GetSuggestions(self, request, context): if request.study_id not in self.registered_studies: self.registered_studies[request.study_id] = NAS_RL_StudyJob( request, self.logger) study = self.registered_studies[request.study_id] self.logger.info( "-" * 100 + "\nSuggestion Step {} for StudyJob {} (ID: {})\n".format( study.ctrl_step, study.study_name, study.study_id) + "-" * 100) with study.tf_graph.as_default(): saver = tf.train.Saver() ctrl = study.controller controller_ops = { "train_step": ctrl.train_step, "loss": ctrl.loss, "train_op": ctrl.train_op, "lr": ctrl.lr, "grad_norm": ctrl.grad_norm, "optimizer": ctrl.optimizer, "baseline": ctrl.baseline, "entropy": ctrl.sample_entropy, "sample_arc": ctrl.sample_arc, "skip_rate": ctrl.skip_rate } run_ops = [ controller_ops["loss"], controller_ops["entropy"], controller_ops["lr"], controller_ops["grad_norm"], controller_ops["baseline"], controller_ops["skip_rate"], controller_ops["train_op"] ] if study.is_first_run: self.logger.info( ">>> First time running suggestion for {}. Random architecture will be given." .format(study.study_name)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) candidates = list() for _ in range(study.num_trials): candidates.append( sess.run(controller_ops["sample_arc"])) # TODO: will use PVC to store the checkpoint to protect against unexpected suggestion pod restart saver.save(sess, study.ctrl_cache_file) study.is_first_run = False else: with tf.Session() as sess: saver.restore(sess, study.ctrl_cache_file) valid_acc = ctrl.reward result = self.GetEvaluationResult(study) # In some rare cases, GetEvaluationResult() may return None # if GetSuggestions() is called before all the trials are completed while result is None: self.logger.warning( ">>> GetEvaluationResult() returns None") time.sleep(20) result = self.GetEvaluationResult(study) # This LSTM network is designed to maximize the metrics # However, if the user wants to minimize the metrics, we can take the negative of the result if study.opt_direction == api_pb2.MINIMIZE: result = -result loss, entropy, lr, gn, bl, skip, _ = sess.run( fetches=run_ops, feed_dict={valid_acc: result}) self.logger.info( ">>> Suggetion updated. LSTM Controller Reward: {}". format(loss)) candidates = list() for _ in range(study.num_trials): candidates.append( sess.run(controller_ops["sample_arc"])) saver.save(sess, study.ctrl_cache_file) organized_candidates = list() trials = list() for i in range(study.num_trials): arc = candidates[i].tolist() organized_arc = [0 for _ in range(study.num_layers)] record = 0 for l in range(study.num_layers): organized_arc[l] = arc[record:record + l + 1] record += l + 1 organized_candidates.append(organized_arc) nn_config = dict() nn_config['num_layers'] = study.num_layers nn_config['input_size'] = study.input_size nn_config['output_size'] = study.output_size nn_config['embedding'] = dict() for l in range(study.num_layers): opt = organized_arc[l][0] nn_config['embedding'][opt] = study.search_space[opt].get_dict( ) organized_arc_json = json.dumps(organized_arc) nn_config_json = json.dumps(nn_config) organized_arc_str = str(organized_arc_json).replace('\"', '\'') nn_config_str = str(nn_config_json).replace('\"', '\'') self.logger.info( "\n>>> New Neural Network Architecture Candidate #{} (internal representation):" .format(i)) self.logger.info(organized_arc_json) self.logger.info("\n>>> Corresponding Seach Space Description:") self.logger.info(nn_config_str) trials.append( api_pb2.Trial( study_id=request.study_id, parameter_set=[ api_pb2.Parameter(name="architecture", value=organized_arc_str, parameter_type=api_pb2.CATEGORICAL), api_pb2.Parameter(name="nn_config", value=nn_config_str, parameter_type=api_pb2.CATEGORICAL) ], )) self.prev_trial_ids = list() self.logger.info("") channel = grpc.beta.implementations.insecure_channel( MANAGER_ADDRESS, MANAGER_PORT) with api_pb2.beta_create_Manager_stub(channel) as client: for i, t in enumerate(trials): ctrep = client.CreateTrial(api_pb2.CreateTrialRequest(trial=t), 10) trials[i].trial_id = ctrep.trial_id self.prev_trial_ids.append(ctrep.trial_id) self.logger.info(">>> {} Trials were created:".format( study.num_trials)) for t in self.prev_trial_ids: self.logger.info(t) self.logger.info("") study.ctrl_step += 1 return api_pb2.GetSuggestionsReply(trials=trials)