def old_to_new_architecture(old_architecture):
  """Convert architectures defined by block_types only.

  These architectures are more restricted -- they always have one input layer
  and one logits layer, and all blocks are only connected to the previous one or
  two blocks.

  Args:
    old_architecture: List of block_type ints.

  Returns:
    Architecture.
  """

  node_list = []
  for block_type in old_architecture:
    block_type_name = blocks_builder.BlockType(block_type).name
    if ("NASNET" in block_type_name) or ("AMOEBA" in block_type_name):
      input_indices = [-2, -1]
    else:
      input_indices = [-1]
    node_list.append(
        Node(
            block_type,
            input_indices=input_indices,
            combiner_type=CombinerType.CONCAT))
  return Architecture(node_list)
示例#2
0
    def get_suggestion(self,
                       trials,
                       hparams,
                       my_trial_id=None,
                       model_dir=None):
        """See base class SearchAlgorithm."""

        architectures, losses = self._load_trials(trials)

        # No feasible trials yet.
        if len(architectures
               ) < self._phoenix_spec.linear_model.trials_before_fit:
            return common.encode_architecture(
                hparams.initial_architecture,
                self._phoenix_spec.problem_type), None
        _, suggestion = self._suggest_by_padding(architectures, losses)

        # Decide whether to allow growth.
        # TODO(b/172564129): refactor common behavior with other search algorithms.
        allowed_depth = common.get_allowed_depth(
            len(architectures),
            depth_thresholds=self._phoenix_spec.
            increase_complexity_minimum_trials,
            max_depth=self._phoenix_spec.maximum_depth)
        explore_mode = common.random(
            self._phoenix_spec.increase_complexity_probability)

        new_block = blocks.BlockType[hparams.new_block_type]

        if suggestion.size <= allowed_depth and explore_mode:
            # increase_structure_depth expects that the architecture contains a
            # flatten block, which may not be true for the linear model's output.
            suggestion = np.array(
                architecture_utils.fix_architecture_order(
                    suggestion, self._phoenix_spec.problem_type))
            suggestion = architecture_utils.increase_structure_depth(
                suggestion, new_block, self._phoenix_spec.problem_type)
        elif _contains_row(architectures, suggestion):
            # The linear model suggested an architecture we've already tried
            # in a previous trial, so we mutate it.
            # TODO(b/172564129): more intelligent _contains_row check: should handle
            # when mutate_replace output has been tried, but not just a while loop,
            # since that could run forver if # of untried architectures is small.
            suggestion = common.mutate_replace(suggestion, new_block)
        else:
            # The linear model suggested a novel architecture; use it.
            pass

        suggestion = [blocks.BlockType(b) for b in suggestion]
        return np.array(
            architecture_utils.fix_architecture_order(
                suggestion, self._phoenix_spec.problem_type)), None
def fix_architecture_order(architecture, problem_type):
    """Fixes the architecture order of cnns.

  This function fixes the architecture for convolutional neural networks.
  Namely, if a dense block is before a convolutional block, then it switches
  the order. For the dnn and rnn case, the function doesn't do anything for
  the architecture as all architectures are valid.

  Args:
    architecture: an iterable of integers or `blocks.BlockType`.
    problem_type: a `PhoenixSpec.ProblemType` enum.

  Returns:
    a list of `blocks.BlockType`.
  """
    # All achitectures are valid in the dnn and rnn case.
    if problem_type != phoenix_spec_pb2.PhoenixSpec.CNN:
        return architecture

    output_architecture = []
    flattens = tuple(block for block in architecture
                     if "FLATTEN" in blocks.BlockType(block).name)
    if not flattens:
        output_architecture = [blocks.BlockType.PLATE_REDUCTION_FLATTEN]
        logging.warning("initial_architecture does not have a flattening "
                        "block.")
        logging.info("Adding a Flatten block to the architecture.")
    else:
        output_architecture = [flattens[0]]

    for block in architecture:
        if (block == blocks.BlockType.FLATTEN
                or block == blocks.BlockType.DOWNSAMPLE_FLATTEN
                or block == blocks.BlockType.PLATE_REDUCTION_FLATTEN):
            continue
        output_architecture = increase_structure_depth(
            np.array(output_architecture), block, problem_type)
        output_architecture = [i.item() for i in output_architecture]
    return [blocks.BlockType(i) for i in output_architecture]
    def get_suggestion(self,
                       trials,
                       hparams,
                       my_trial_id=None,
                       model_dir=None):
        """See the base class for details."""
        if self._phoenix_spec.beam_size < 1:
            raise ValueError("phoenix_spec.beam_size must be >= 1.")
        sorted_trials = self._metadata.get_best_k(
            trials, k=int(1e10), valid_only=True) or []
        num_completed_trials = len(sorted_trials)
        best_trials = sorted_trials[:self._phoenix_spec.beam_size]

        # No feasible trials yet.
        if not best_trials:
            return common.encode_architecture(
                hparams.initial_architecture,
                self._phoenix_spec.problem_type), None

        # Increase depth if possible.
        best_architecture, best_trial = (
            common.choose_random_trial_and_get_architecture(best_trials))
        allowed_depth = common.get_allowed_depth(
            num_completed_trials,
            depth_thresholds=self._phoenix_spec.
            increase_complexity_minimum_trials,
            max_depth=self._max_depth)
        logging.info("Maximal depth allowed: %d", allowed_depth)
        explore_mode = common.random(
            self._phoenix_spec.increase_complexity_probability)
        new_block = blocks.BlockType[hparams.new_block_type]

        if best_architecture.size < allowed_depth and explore_mode:
            common.write_fork_edge(model_dir, my_trial_id, best_trial)
            return architecture_utils.increase_structure_depth(
                best_architecture, new_block,
                self._phoenix_spec.problem_type), best_trial

        # Otherwise enter evolutionary mode.
        logging.info("using evolution")

        output_architecture = common.mutate_replace(best_architecture,
                                                    new_block)
        output_architecture = [
            blocks.BlockType(x) for x in output_architecture
        ]
        common.write_fork_edge(model_dir, my_trial_id, best_trial)
        return np.array(
            architecture_utils.fix_architecture_order(
                output_architecture,
                self._phoenix_spec.problem_type)), best_trial
示例#5
0
def _get_suggestion(architectures,
                    blocks_to_use,
                    losses,
                    grow=False,
                    remove_outliers=False,
                    pass_flatten=False):
  """Testing subroutine to handle boilerplate Trial construction, dirs, etc."""

  # TODO(b/172564129): Figure out how to use mock decorator for free functions.
  with mock.patch("model_search.architecture"
                  ".architecture_utils.get_architecture") as mock_get_arch:

    blocks_strs = [blocks.BlockType(b).name for b in blocks_to_use]
    spec = search_test_utils.create_spec(
        phoenix_spec_pb2.PhoenixSpec.CNN,
        blocks_to_use=blocks_strs,
    )
    spec.search_type = phoenix_spec_pb2.PhoenixSpec.LINEAR_MODEL
    spec.increase_complexity_probability = 1.0 if grow else 0.0
    spec.linear_model.remove_outliers = remove_outliers
    spec.linear_model.trials_before_fit = 1
    algorithm = linear_model.LinearModel(spec)

    mock_get_arch.side_effect = lambda idx: architectures[int(idx)]

    trials = []
    for i, loss in enumerate(losses):
      if isinstance(loss, (np.floating, np.integer)):
        loss = loss.item()
      trials.append(
          trial_module.Trial({
              "id": i,
              "model_dir": str(i),
              "status": "COMPLETED",
              "trial_infeasible": False,
              "final_measurement": {
                  "objective_value": loss
              }
          }))

    hparams = hp.HParams(new_block_type=NEW_BLOCK)
    # Second return val fork_trial is a nonsense concept for LinearModel.
    output_architecture, _ = algorithm.get_suggestion(trials, hparams)
    if not pass_flatten:
      output_architecture = np.array(
          [b for b in output_architecture if b not in blocks.FLATTEN_TYPES])
    return output_architecture
示例#6
0
  def test_flatten_output(self, grow):
    """Ensure we output suggestions with a flatten block correctly placed."""

    # Make trials s.t. the linear model will output all convolutions.
    architectures = [
        np.repeat(blocks.BlockType.EMPTY_BLOCK, 4),
        np.repeat(blocks.BlockType.CONVOLUTION_3X3, 4)
    ]
    losses = [0.1, 0.01]
    blocks_to_use = [blocks.BlockType.CONVOLUTION_3X3]

    # Make sure the model suggestion includes a flatten block,
    # despite raw model output being all convolutional.
    best = _get_suggestion(
        architectures, blocks_to_use, losses, grow=grow, pass_flatten=True)
    flattens = [b for b in best if "FLATTEN" in blocks.BlockType(b).name]
    nflat = len(flattens)
    self.assertGreater(nflat, 0)
示例#7
0
def mutate_replace(architecture, new_block):
    """Replaces one random block with the chosen new block.

  Returns a copy; input is not modified. The element to replace is chosen
  uniformly at random. Special care is taken not to replace the FLATTEN block.

  Args:
    architecture: An np.ndarray of integers corresponding to BlockType enum.
    new_block: Integer value of the desired BlockType to insert.

  Returns:
    An np.array of the architecture containing the new block.
  """
    output_architecture = architecture.copy()
    while True:
        block_to_replace = np.random.randint(0, architecture.size)
        blocktype = blocks.BlockType(output_architecture[block_to_replace])
        if blocktype not in blocks.FLATTEN_TYPES:
            break
    output_architecture[block_to_replace] = new_block
    return output_architecture
    def get_suggestion(self,
                       trials,
                       hparams,
                       my_trial_id=None,
                       model_dir=None):
        """See the base class for details."""
        del my_trial_id  # Unused.

        new_block = blocks.BlockType[hparams.new_block_type]
        if self._is_reduction_block(new_block):
            raise ValueError(
                "ConstrainedDescent should not have reduction blocks in "
                "its search space.")

        if self._phoenix_spec.beam_size < 1:
            raise ValueError("phoenix_spec.beam_size must be >= 1.")
        sorted_trials = self._metadata.get_best_k(
            trials, k=int(1e10), valid_only=True) or []
        num_completed_trials = len(sorted_trials)
        best_trials = sorted_trials[:self._phoenix_spec.beam_size]

        # No feasible trials yet, use initial architecture passed in from hparams.
        if not best_trials:
            best_architecture = common.encode_architecture(
                hparams.initial_architecture, self._phoenix_spec.problem_type)
            best_trial = None
        else:
            best_architecture, best_trial = (
                common.choose_random_trial_and_get_architecture(best_trials))

        # Get the architecture without reductions or replications which will be
        # grown or mutated.
        if self._phoenix_spec.replicate_cell:
            output_architecture = best_architecture[:self._phoenix_spec.
                                                    num_blocks_in_cell]
            grow_mode = False
        else:
            output_architecture = self._remove_reduction_blocks(
                best_architecture)
            grow_mode = common.random(
                self._phoenix_spec.increase_complexity_probability)

        # Grow, mutate, and/or replicate architecture then add reductions & flatten.
        allowed_depth = self._get_allowed_depth(num_completed_trials)
        logging.info("Maximum depth allowed: %d", allowed_depth)
        if output_architecture.size < allowed_depth and grow_mode:
            logging.info("Growing the architecture.")
            output_architecture = architecture_utils.increase_structure_depth(
                output_architecture, new_block,
                self._phoenix_spec.problem_type)
        else:
            logging.info("Mutating the architecture.")
            output_architecture = common.mutate_replace(
                output_architecture, new_block)

        if self._phoenix_spec.replicate_cell:
            replication_times = allowed_depth // self._phoenix_spec.num_blocks_in_cell
            output_architecture = np.concatenate(
                [output_architecture for _ in range(replication_times)])

        output_architecture = self._add_reduction_blocks(
            output_architecture, self._phoenix_spec.num_blocks_in_cell,
            self._phoenix_spec.reduction_block_type)
        output_architecture = [
            blocks.BlockType(x) for x in output_architecture
        ]
        output_architecture = np.array(
            architecture_utils.fix_architecture_order(
                output_architecture, self._phoenix_spec.problem_type))
        return output_architecture, best_trial
 def _is_reduction_block(self, block):
     name = blocks.BlockType(block).name
     return "REDUCTION" in name or "DOWNSAMPLE" in name or "POOL" in name
示例#10
0
def create_tower_spec(phoenix_spec,
                      inputs,
                      architecture,
                      dimension,
                      is_frozen,
                      lengths=None,
                      allow_auxiliary_head=False):
    """Creates the logits for the tower.

  Args:
    phoenix_spec: The trial's `phoenix_spec_pb2.PhoenixSpec` proto.
    inputs: The list of `tf.Tensors` of the tower.
    architecture: The list of `blocks.BlockType` of the tower architecture.
    dimension: int - the output tensor last axis dimension.
    is_frozen: Whether the tower should be frozen.
    lengths: A tensor of shape [batch] holding the sequence length for a
      sequential problem (rnn).
    allow_auxiliary_head: Whether to allow creating an auxiliary head if
      possible. Only applicable for CNNs.

  Returns:
    A LogitsSpec containing the main and auxiliary logits and the architecture
    of the underlying tower.
  """

    # Discard inputs[0] since this is the raw features.
    all_layer_tensors = inputs
    pre_logits = inputs[-1]
    logits_weight = 1.0
    aux_logits = None
    aux_logits_weight = None
    if (phoenix_spec.problem_type ==
            phoenix_spec_pb2.PhoenixSpec.RNN_ALL_ACTIVATIONS):
        logits = tf.compat.v1.layers.conv1d(inputs=pre_logits,
                                            filters=dimension,
                                            kernel_size=1)
    elif (phoenix_spec.problem_type ==
          phoenix_spec_pb2.PhoenixSpec.RNN_LAST_ACTIVATIONS):
        if lengths is not None:
            logits = utils.last_activations_in_sequence(
                tf.compat.v1.layers.conv1d(inputs=pre_logits,
                                           filters=dimension,
                                           kernel_size=1), lengths)
        else:
            logging.warning("Length is missing for rnn_last problem type.")
            logits = tf.compat.v1.layers.conv1d(inputs=pre_logits,
                                                filters=dimension,
                                                kernel_size=1)
    elif phoenix_spec.problem_type == phoenix_spec_pb2.PhoenixSpec.CNN:
        logits = tf.keras.layers.Dense(dimension, name="dense")(pre_logits)
        if allow_auxiliary_head and phoenix_spec.use_auxiliary_head:
            reductions = []
            flattens = []
            for i, block in enumerate(architecture):
                name = blocks.BlockType(block).name
                if "DOWNSAMPLE" in name or "REDUCTION" in name:
                    reductions.append(i)
                # Some blocks reduce and flatten.
                if "FLATTEN" in name:
                    flattens.append(i)
            if reductions:
                # Add the auxiliary head right before the reduction cell.
                idx = reductions[-1]
                aux_logits = _build_nas_aux_head(inputs[idx], dimension,
                                                 phoenix_spec.cnn_data_format)
                if aux_logits is not None:
                    aux_logits_weight = phoenix_spec.auxiliary_head_loss_weight
            if flattens and aux_logits is None:
                idx = flattens[-1]
                aux_logits = tf.keras.layers.Dense(
                    dimension, name="aux_dense")(inputs[idx])
                aux_logits_weight = phoenix_spec.auxiliary_head_loss_weight
    elif phoenix_spec.problem_type == phoenix_spec_pb2.PhoenixSpec.DNN:
        logits = tf.keras.layers.Dense(dimension, name="dense")(pre_logits)
    else:
        raise ValueError("phoenix_spec.problem_type must be either DNN, CNN, "
                         "RNN_LAST_ACTIVATIONS, or RNN_ALL_ACTIVATIONS.")

    logits = tf.identity(logits, name="logits")
    if aux_logits is not None:
        aux_logits = tf.identity(aux_logits, name="aux_logits")

    # TODO(b/172564129): Remove from eval graph.
    if is_frozen:
        logits = tf.stop_gradient(logits)
        if aux_logits is not None:
            aux_logits = tf.stop_gradient(aux_logits)

    return TowerSpec(
        logits_spec=LogitsSpec(logits, logits_weight, aux_logits,
                               aux_logits_weight),
        architecture=[blocks.BlockType(block).name for block in architecture],
        layer_tensors=all_layer_tensors)
示例#11
0
def construct_tower(phoenix_spec,
                    input_tensor,
                    tower_name,
                    architecture,
                    is_training,
                    lengths,
                    logits_dimension,
                    is_frozen,
                    dropout_rate=None,
                    allow_auxiliary_head=False):
    """Creates a tower giving an architecture.

  Args:
    phoenix_spec: The trial's `phoenix_spec_pb2.PhoenixSpec` proto.
    input_tensor: An input `tf.Tensor` to build the network on top of.
    tower_name: a unique name for the tower (string).
    architecture: np.array of ints (`blocks.BlockType`) with the architecture of
      the neural network to build.
    is_training: a boolean indicating if we are in training.
    lengths: A `tf.Tensor` with the lengths (dimenions: [batch_size]) holding
      the length of each sequence for sequential problems. Keep as None, for non
      sequential problems.
    logits_dimension: The last axis dimension of the logits.
    is_frozen: Is the tower frozen - integer and not boolean.
    dropout_rate: a float indicating the rate of dropouts to apply between
      blocks. Applied only if the value is above zero.
    allow_auxiliary_head: Whether to allow importing the tower's auxiliary head,
      if the tower has one. Only applicable for CNNs.

  Returns:
    The output `tf.Tensor` of the last layer in the built neural network.
  """
    blocks_builders = blocks.Blocks()
    output = [input_tensor]
    block_index = 1
    str_signature = ""
    with tf.compat.v1.variable_scope("Phoenix/{}".format(tower_name)):
        for block_type in architecture:
            str_signature += str(block_type)
            # TODO(b/172564129): Should block_index also be ignored when uniform
            # average transfer learning? How would we handle repeated blocks, e.g. two
            # FC layers stacked on top of each other.
            scope = "{0}_{1}_{2}".format(str(block_index),
                                         blocks.BlockType(block_type).name,
                                         str_signature)
            scope = strip_scope(
                scope,
                phoenix_spec.transfer_learning_spec.transfer_learning_type,
                str_signature)
            with tf.compat.v1.variable_scope(scope):
                with (arg_scope(DATA_FORMAT_OPS,
                                data_format=phoenix_spec.cnn_data_format)):
                    output = blocks_builders[block_type].build(
                        input_tensors=output,
                        is_training=is_training,
                        lengths=lengths)
                    if dropout_rate and dropout_rate > 0:
                        output[-1] = tf.compat.v1.layers.dropout(
                            output[-1],
                            rate=dropout_rate,
                            training=is_training)
                    block_index += 1

        # Create the logits.
        scope = "last_dense_{}".format(str_signature)
        scope = strip_scope(
            scope, phoenix_spec.transfer_learning_spec.transfer_learning_type,
            str_signature)
        with tf.compat.v1.variable_scope(scope):
            tower_spec = create_tower_spec(phoenix_spec, output, architecture,
                                           logits_dimension, is_frozen,
                                           lengths, allow_auxiliary_head)

    set_architecture(architecture, tower_name)
    set_parameter(tower_name, DROPOUTS,
                  (-1.0 if dropout_rate is None else dropout_rate), tf.float32)
    set_parameter(tower_name, IS_FROZEN, int(is_frozen))
    return tower_spec
示例#12
0
 def block_name(self):
   return str(blocks_builder.BlockType(self.block_type).name)