示例#1
0
    def __init__(self, model_fn, objective, name, distributions, **kwargs):
        """ Tuner abstract class

        Args:
            model_fn (function): Function that return a Keras model
            name (str): name of the tuner
            objective (str): Which objective the tuner optimize for
            distributions (Distributions): distributions object

        Notes:
            All meta data and varialbles are stored into self.state
            defined in ../states/tunerstate.py
        """

        # hypertuner state init
        self.state = TunerState(name, objective, **kwargs)
        self.stats = self.state.stats  # shorthand access
        self.cloudservice = CloudService()

        # check model function
        if not model_fn:
            fatal("Model function can't be empty")
        try:
            mdl = model_fn()
        except:
            traceback.print_exc()
            fatal("Invalid model function")

        if not isinstance(mdl, Model):
            t = "tensorflow.keras.models.Model"
            fatal("Invalid model function: Doesn't return a %s object" % t)

        # function is valid - recording it
        self.model_fn = model_fn

        # Initializing distributions
        hparams = config._DISTRIBUTIONS.get_hyperparameters_config()
        if len(hparams) == 0:
            warning("No hyperparameters used in model function. Are you sure?")

        # set global distribution object to the one requested by tuner
        # !MUST be after _eval_model_fn()
        config._DISTRIBUTIONS = distributions(hparams)

        # instances management
        self.max_fail_streak = 5  # how many failure before giving up
        self.instance_states = InstanceStatesCollection()

        # previous models
        print("Loading from %s" % self.state.host.results_dir)
        count = self.instance_states.load_from_dir(self.state.host.results_dir,
                                                   self.state.project,
                                                   self.state.architecture)
        self.stats.instance_states_previously_trained = count
        info("Tuner initialized")
示例#2
0
    def __train_bracket(self, instance_collection, num_epochs, x, y,
                        **fit_kwargs):
        "Train all the models that are in a given bracket."
        num_instances = len(instance_collection)

        info('Training %d models for %d epochs.' % (num_instances, num_epochs))
        for idx, instance in enumerate(instance_collection.to_list()):
            info('  Training: %d/%d' % (idx, num_instances))
            self.__train_instance(instance,
                                  x,
                                  y,
                                  epochs=num_epochs,
                                  **fit_kwargs)
示例#3
0
 def enable(self, api_key, url=None):
     """enable cloud service by setting API key"""
     self.api_key = api_key
     if url:
       self.base_url = url
     if self._check_access():
         info("Cloud service enabled - Go to https://.. to track your "
              "tuning results in realtime.")
         self.status = OK
         self.is_enable = True
     else:
         warning("Invalid cloud API key")
         self.status = AUTH_ERROR
         self.is_enable = False
示例#4
0
    def save_best_models(self, export_type="keras", num_models=1):
        """ Exports the best model based on the specified metric, to the
            results directory.

            Args:
                output_type (str, optional): Defaults to "keras". What format
                    of model to export:

                    # Tensorflow 1.x/2.x
                    "keras" - Save as separate config (JSON) and weights (HDF5)
                        files.
                    "keras_bundle" - Saved in Keras's native format (HDF5), via
                        save_model()

                    # Currently only supported in Tensorflow 1.x
                    "tf" - Saved in tensorflow's SavedModel format. See:
                        https://www.tensorflow.org/alpha/guide/saved_model
                    "tf_frozen" - A SavedModel, where the weights are stored
                        in the model file itself, rather than a variables
                        directory. See:
                        https://www.tensorflow.org/guide/extend/model_files
                    "tf_optimized" - A frozen SavedModel, which has
                        additionally been transformed via tensorflow's graph
                        transform library to remove training-specific nodes
                        and operations.  See:
                        https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms
                    "tf_lite" - A TF Lite model.
        """

        instance_states, execution_states, models = self.get_best_models(
            num_models=num_models, compile=False)

        zipped = zip(models, instance_states, execution_states)
        for idx, (model, instance_state, execution_state) in enumerate(zipped):
            export_prefix = "%s-%s-%s-%s" % (
                self.state.project, self.state.architecture,
                instance_state.idx, execution_state.idx)

            export_path = os.path.join(self.state.host.export_dir,
                                       export_prefix)

            tmp_path = os.path.join(self.state.host.tmp_dir, export_prefix)
            info("Exporting top model (%d/%d) - %s" %
                 (idx + 1, len(models), export_path))
            tf_utils.save_model(model,
                                export_path,
                                tmp_path=tmp_path,
                                export_type=export_type)
示例#5
0
    def __filter_early_stops(self, instance_collection, epoch_target):
        filtered_instances = []
        for instance in instance_collection:
            last_execution = instance.execution_states_collection.get_last()
            if not last_execution.metrics or not last_execution.metrics.exist(
                    "loss"):
                info("Skipping instance %s - no metrics." % instance.idx)
                continue
            metric = last_execution.metrics.get("loss")
            epoch_history_len = len(metric.history)
            if epoch_history_len < epoch_target:
                info("Skipping instance %s - history is only %d epochs long - "
                     "expected %d - assuming early stop." %
                     (instance.idx, epoch_history_len, epoch_target))
                continue

            filtered_instances.append(instance)
        return filtered_instances
示例#6
0
    def results_summary(self, num_models=10, sort_metric=None):
        """Display tuning results summary.

        Args:
            num_models (int, optional): Number of model to display.
            Defaults to 10.
            sort_metric (str, optional): Sorting metric, when not specified
            sort models by objective value. Defaults to None.
        """
        if self.state.dry_run:
            info("Dry-Run - no results to report.")
            return

        # FIXME API documentation
        _results_summary(input_dir=self.state.host.results_dir,
                         project=self.state.project,
                         architecture=self.state.architecture,
                         num_models=num_models,
                         sort_metric=sort_metric)
示例#7
0
    def bracket(self, instance_collection, num_to_keep, num_epochs,
                total_num_epochs, x, y, **fit_kwargs):
        output_collection = InstanceStatesCollection()
        if self.state.dry_run:
            for i in range(num_to_keep):
                output_collection.add(i, None)
            return output_collection

        self.__train_bracket(instance_collection, num_epochs, x, y,
                             **fit_kwargs)
        instances = instance_collection.sort_by_objective()
        instances = self.__filter_early_stops(instances, total_num_epochs)

        if len(instances) > num_to_keep:
            instances = instances[:num_to_keep]
            info("Keeping %d instances out of %d" %
                 (len(instances), len(instance_collection)))

        output_collection = InstanceStatesCollection()
        for instance in instances:
            output_collection.add(instance.idx, instance)
        return output_collection
    def load_from_dir(self,
                      path,
                      project='default',
                      architecture=None,
                      verbose=1):
        """Load instance collection from disk or bucket

        Args:
            path (str): Local path or bucket path where instance results
            are stored

            project (str, optional): Tuning project name. Defaults to default.

            architecture (str, optional): Tuning architecture name.
            Defaults to None.

            verbose (int, optional): Verbose output? Default to 1.

        Returns:
            int: number of instances loaded
        """
        count = 0

        glob_path = str(Path(path) / "*-results.json")
        filenames = glob(glob_path)

        for fname in progress_bar(filenames,
                                  unit='instance',
                                  desc='Loading tuning results'):

            config = json.loads(read_file(str(fname)))

            # check fields existence
            if 'tuner' not in config:
                continue
            if 'architecture' not in config['tuner']:
                continue
            if 'project' not in config['tuner']:
                continue

            # check instance belongs to the right project / architecture
            if (project != config['tuner']['project']):
                print("Rejected %s != %s" %
                      (project, config['tuner']['project']))
                continue

            # Allowing architecture to be None allows to reload models from
            # various architecture for retrain, summary and export purpose
            if (architecture and
                    architecture != config['tuner']['architecture']):  # nopep8
                print("Rejected arch %s != %s" %
                      (architecture, config['tuner']['architecture']))
                continue

            idx = config['instance']['idx']
            instance_state = InstanceState.from_config(config['instance'])
            self._objects[idx] = instance_state
            self._last_insert_idx = idx
            count += 1

        if verbose:
            info("%s previous instances reloaded" % count)

        return count
示例#9
0
    def search(self, x, y, **kwargs):
        assert 'epochs' not in kwargs, \
            "Number of epochs is controlled by the tuner."
        remaining_batches = self.config.num_batches

        while remaining_batches > 0:
            info('Budget: %s/%s - Loop %.2f/%.2f' %
                 (self.epoch_budget_expensed, self.state.epoch_budget,
                  remaining_batches, self.config.num_batches))

            # Last (fractional) loop
            if remaining_batches < 1.0:
                # Reduce the number of models for the last fractional loop
                model_sequence = self.config.partial_batch_epoch_sequence
                if model_sequence is None:
                    break
                info('Partial Batch Model Sequence %s' % model_sequence)
            else:
                model_sequence = self.config.model_sequence

            # Generate N models, and perform the initial training.
            subsection('Generating %s models' % model_sequence[0])
            candidates = InstanceStatesCollection()
            num_models = model_sequence[0]

            for idx in tqdm(range(num_models),
                            desc='Generating models',
                            unit='model'):

                if self.state.dry_run:
                    candidates.add(idx, None)
                else:
                    instance = self.new_instance()
                    if instance is not None:
                        candidates.add(instance.state.idx, instance.state)

            if not candidates:
                info("No models were generated.")
                break

            subsection("Training models.")

            for bracket_idx, num_models in enumerate(model_sequence):
                num_epochs = self.config.delta_epoch_sequence[bracket_idx]
                total_num_epochs = self.config.epoch_sequence[bracket_idx]

                num_to_keep = 0
                if bracket_idx < len(model_sequence) - 1:
                    num_to_keep = model_sequence[bracket_idx + 1]
                    info("Running a bracket to reduce from %d to %d models "
                         "in %d epochs" %
                         (num_models, num_to_keep, num_epochs))
                else:
                    num_to_keep = model_sequence[bracket_idx]
                    info("Running final bracket - %d models for %d epochs" %
                         (num_to_keep, num_epochs))

                info('Budget: %s/%s - Loop %.2f/%.2f - Brackets %s/%s' %
                     (self.epoch_budget_expensed, self.state.epoch_budget,
                      remaining_batches, self.config.num_batches,
                      bracket_idx + 1, self.config.num_brackets))

                self.epoch_budget_expensed += num_models * num_epochs

                candidates = self.bracket(candidates, num_to_keep, num_epochs,
                                          total_num_epochs, x, y, **kwargs)

            remaining_batches -= 1

        info('Final Budget Used: %s/%s' %
             (self.epoch_budget_expensed, self.state.epoch_budget))