def __init__(self, model_fn, objective, name, distributions, **kwargs): """ Tuner abstract class Args: model_fn (function): Function that return a Keras model name (str): name of the tuner objective (str): Which objective the tuner optimize for distributions (Distributions): distributions object Notes: All meta data and varialbles are stored into self.state defined in ../states/tunerstate.py """ # hypertuner state init self.state = TunerState(name, objective, **kwargs) self.stats = self.state.stats # shorthand access self.cloudservice = CloudService() # check model function if not model_fn: fatal("Model function can't be empty") try: mdl = model_fn() except: traceback.print_exc() fatal("Invalid model function") if not isinstance(mdl, Model): t = "tensorflow.keras.models.Model" fatal("Invalid model function: Doesn't return a %s object" % t) # function is valid - recording it self.model_fn = model_fn # Initializing distributions hparams = config._DISTRIBUTIONS.get_hyperparameters_config() if len(hparams) == 0: warning("No hyperparameters used in model function. Are you sure?") # set global distribution object to the one requested by tuner # !MUST be after _eval_model_fn() config._DISTRIBUTIONS = distributions(hparams) # instances management self.max_fail_streak = 5 # how many failure before giving up self.instance_states = InstanceStatesCollection() # previous models print("Loading from %s" % self.state.host.results_dir) count = self.instance_states.load_from_dir(self.state.host.results_dir, self.state.project, self.state.architecture) self.stats.instance_states_previously_trained = count info("Tuner initialized")
def __train_bracket(self, instance_collection, num_epochs, x, y, **fit_kwargs): "Train all the models that are in a given bracket." num_instances = len(instance_collection) info('Training %d models for %d epochs.' % (num_instances, num_epochs)) for idx, instance in enumerate(instance_collection.to_list()): info(' Training: %d/%d' % (idx, num_instances)) self.__train_instance(instance, x, y, epochs=num_epochs, **fit_kwargs)
def enable(self, api_key, url=None): """enable cloud service by setting API key""" self.api_key = api_key if url: self.base_url = url if self._check_access(): info("Cloud service enabled - Go to https://.. to track your " "tuning results in realtime.") self.status = OK self.is_enable = True else: warning("Invalid cloud API key") self.status = AUTH_ERROR self.is_enable = False
def save_best_models(self, export_type="keras", num_models=1): """ Exports the best model based on the specified metric, to the results directory. Args: output_type (str, optional): Defaults to "keras". What format of model to export: # Tensorflow 1.x/2.x "keras" - Save as separate config (JSON) and weights (HDF5) files. "keras_bundle" - Saved in Keras's native format (HDF5), via save_model() # Currently only supported in Tensorflow 1.x "tf" - Saved in tensorflow's SavedModel format. See: https://www.tensorflow.org/alpha/guide/saved_model "tf_frozen" - A SavedModel, where the weights are stored in the model file itself, rather than a variables directory. See: https://www.tensorflow.org/guide/extend/model_files "tf_optimized" - A frozen SavedModel, which has additionally been transformed via tensorflow's graph transform library to remove training-specific nodes and operations. See: https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms "tf_lite" - A TF Lite model. """ instance_states, execution_states, models = self.get_best_models( num_models=num_models, compile=False) zipped = zip(models, instance_states, execution_states) for idx, (model, instance_state, execution_state) in enumerate(zipped): export_prefix = "%s-%s-%s-%s" % ( self.state.project, self.state.architecture, instance_state.idx, execution_state.idx) export_path = os.path.join(self.state.host.export_dir, export_prefix) tmp_path = os.path.join(self.state.host.tmp_dir, export_prefix) info("Exporting top model (%d/%d) - %s" % (idx + 1, len(models), export_path)) tf_utils.save_model(model, export_path, tmp_path=tmp_path, export_type=export_type)
def __filter_early_stops(self, instance_collection, epoch_target): filtered_instances = [] for instance in instance_collection: last_execution = instance.execution_states_collection.get_last() if not last_execution.metrics or not last_execution.metrics.exist( "loss"): info("Skipping instance %s - no metrics." % instance.idx) continue metric = last_execution.metrics.get("loss") epoch_history_len = len(metric.history) if epoch_history_len < epoch_target: info("Skipping instance %s - history is only %d epochs long - " "expected %d - assuming early stop." % (instance.idx, epoch_history_len, epoch_target)) continue filtered_instances.append(instance) return filtered_instances
def results_summary(self, num_models=10, sort_metric=None): """Display tuning results summary. Args: num_models (int, optional): Number of model to display. Defaults to 10. sort_metric (str, optional): Sorting metric, when not specified sort models by objective value. Defaults to None. """ if self.state.dry_run: info("Dry-Run - no results to report.") return # FIXME API documentation _results_summary(input_dir=self.state.host.results_dir, project=self.state.project, architecture=self.state.architecture, num_models=num_models, sort_metric=sort_metric)
def bracket(self, instance_collection, num_to_keep, num_epochs, total_num_epochs, x, y, **fit_kwargs): output_collection = InstanceStatesCollection() if self.state.dry_run: for i in range(num_to_keep): output_collection.add(i, None) return output_collection self.__train_bracket(instance_collection, num_epochs, x, y, **fit_kwargs) instances = instance_collection.sort_by_objective() instances = self.__filter_early_stops(instances, total_num_epochs) if len(instances) > num_to_keep: instances = instances[:num_to_keep] info("Keeping %d instances out of %d" % (len(instances), len(instance_collection))) output_collection = InstanceStatesCollection() for instance in instances: output_collection.add(instance.idx, instance) return output_collection
def load_from_dir(self, path, project='default', architecture=None, verbose=1): """Load instance collection from disk or bucket Args: path (str): Local path or bucket path where instance results are stored project (str, optional): Tuning project name. Defaults to default. architecture (str, optional): Tuning architecture name. Defaults to None. verbose (int, optional): Verbose output? Default to 1. Returns: int: number of instances loaded """ count = 0 glob_path = str(Path(path) / "*-results.json") filenames = glob(glob_path) for fname in progress_bar(filenames, unit='instance', desc='Loading tuning results'): config = json.loads(read_file(str(fname))) # check fields existence if 'tuner' not in config: continue if 'architecture' not in config['tuner']: continue if 'project' not in config['tuner']: continue # check instance belongs to the right project / architecture if (project != config['tuner']['project']): print("Rejected %s != %s" % (project, config['tuner']['project'])) continue # Allowing architecture to be None allows to reload models from # various architecture for retrain, summary and export purpose if (architecture and architecture != config['tuner']['architecture']): # nopep8 print("Rejected arch %s != %s" % (architecture, config['tuner']['architecture'])) continue idx = config['instance']['idx'] instance_state = InstanceState.from_config(config['instance']) self._objects[idx] = instance_state self._last_insert_idx = idx count += 1 if verbose: info("%s previous instances reloaded" % count) return count
def search(self, x, y, **kwargs): assert 'epochs' not in kwargs, \ "Number of epochs is controlled by the tuner." remaining_batches = self.config.num_batches while remaining_batches > 0: info('Budget: %s/%s - Loop %.2f/%.2f' % (self.epoch_budget_expensed, self.state.epoch_budget, remaining_batches, self.config.num_batches)) # Last (fractional) loop if remaining_batches < 1.0: # Reduce the number of models for the last fractional loop model_sequence = self.config.partial_batch_epoch_sequence if model_sequence is None: break info('Partial Batch Model Sequence %s' % model_sequence) else: model_sequence = self.config.model_sequence # Generate N models, and perform the initial training. subsection('Generating %s models' % model_sequence[0]) candidates = InstanceStatesCollection() num_models = model_sequence[0] for idx in tqdm(range(num_models), desc='Generating models', unit='model'): if self.state.dry_run: candidates.add(idx, None) else: instance = self.new_instance() if instance is not None: candidates.add(instance.state.idx, instance.state) if not candidates: info("No models were generated.") break subsection("Training models.") for bracket_idx, num_models in enumerate(model_sequence): num_epochs = self.config.delta_epoch_sequence[bracket_idx] total_num_epochs = self.config.epoch_sequence[bracket_idx] num_to_keep = 0 if bracket_idx < len(model_sequence) - 1: num_to_keep = model_sequence[bracket_idx + 1] info("Running a bracket to reduce from %d to %d models " "in %d epochs" % (num_models, num_to_keep, num_epochs)) else: num_to_keep = model_sequence[bracket_idx] info("Running final bracket - %d models for %d epochs" % (num_to_keep, num_epochs)) info('Budget: %s/%s - Loop %.2f/%.2f - Brackets %s/%s' % (self.epoch_budget_expensed, self.state.epoch_budget, remaining_batches, self.config.num_batches, bracket_idx + 1, self.config.num_brackets)) self.epoch_budget_expensed += num_models * num_epochs candidates = self.bracket(candidates, num_to_keep, num_epochs, total_num_epochs, x, y, **kwargs) remaining_batches -= 1 info('Final Budget Used: %s/%s' % (self.epoch_budget_expensed, self.state.epoch_budget))