Пример #1
0
 def test_construct_tower_with_transfer_learning(
     self,
     transfer_learning_type=transfer_learning_spec_pb2.TransferLearningSpec.
     NO_TRANSFER_LEARNING):
     # convolutions and then flatten plate.
     architecture = np.array([1, 3, 34])
     str_signature = "_1334"
     input_tensor = tf.zeros([100, 32, 32, 3])
     tower_name = "test_tower"
     transfer_learning_spec = transfer_learning_spec_pb2.TransferLearningSpec(
         transfer_learning_type=transfer_learning_type)
     phoenix_spec = phoenix_spec_pb2.PhoenixSpec(
         problem_type=phoenix_spec_pb2.PhoenixSpec.CNN,
         transfer_learning_spec=transfer_learning_spec)
     _ = architecture_utils.construct_tower(
         phoenix_spec=phoenix_spec,
         input_tensor=input_tensor,
         tower_name=tower_name,
         architecture=architecture,
         is_training=True,
         lengths=None,
         logits_dimension=10,
         hparams=hp.HParams(),
         model_directory=self.get_temp_dir(),
         is_frozen=False,
         dropout_rate=None)
     tensors = architecture_utils.get_tower_variables(tower_name)
     for tensor in tensors:
         if (transfer_learning_type == transfer_learning_spec_pb2.
                 TransferLearningSpec.NO_TRANSFER_LEARNING):
             self.assertEndsWith(tensor.op.name, str_signature)
         else:
             self.assertNotEndsWith(tensor.op.name, str_signature)
Пример #2
0
 def _create_checkpoint(self, towers, trial_id):
     with self.test_session(graph=tf.Graph()) as sess:
         architecture = np.array([1, 3, 34])
         input_tensor = tf.zeros([100, 32, 32, 3])
         phoenix_spec = phoenix_spec_pb2.PhoenixSpec(
             problem_type=phoenix_spec_pb2.PhoenixSpec.CNN)
         dirname = os.path.join(flags.FLAGS.test_tmpdir, str(trial_id))
         if dirname and not tf.io.gfile.exists(dirname):
             tf.io.gfile.makedirs(dirname)
         for tower in towers:
             _ = architecture_utils.construct_tower(
                 phoenix_spec=phoenix_spec,
                 input_tensor=input_tensor,
                 tower_name=str(tower) + '_0',
                 architecture=architecture,
                 is_training=True,
                 lengths=None,
                 logits_dimension=10,
                 hparams=hp.HParams(),
                 model_directory=dirname,
                 is_frozen=False,
                 dropout_rate=None)
             architecture_utils.set_number_of_towers(tower, 1)
         architecture_utils.set_number_of_towers('replay_generator', 0)
         directory = flags.FLAGS.test_tmpdir
         saver = tf.compat.v1.train.Saver()
         sess.run(tf.compat.v1.global_variables_initializer())
         sess.run(tf.compat.v1.local_variables_initializer())
         saver.save(sess, os.path.join(directory, str(trial_id)) + '/ckpt')
Пример #3
0
    def test_construct_network(self, dropout, expected_logits):
        # Force graph mode
        with tf.compat.v1.Graph().as_default():
            tf.random.set_seed(1234)
            # convolutions and then flatten plate.
            architecture = np.array([1, 3, 34])
            input_tensor = tf.compat.v1.placeholder(dtype=tf.float32,
                                                    shape=[None, 32, 32, 3],
                                                    name="input")
            phoenix_spec = phoenix_spec_pb2.PhoenixSpec(
                problem_type=phoenix_spec_pb2.PhoenixSpec.CNN)
            tower_spec = architecture_utils.construct_tower(
                phoenix_spec=phoenix_spec,
                input_tensor=input_tensor,
                tower_name="test_tower",
                architecture=architecture,
                is_training=True,
                lengths=None,
                logits_dimension=10,
                hparams=hp.HParams(),
                model_directory=self.get_temp_dir(),
                is_frozen=False,
                dropout_rate=dropout)
            np.random.seed(42)
            test_input = np.random.random([1, 32, 32, 3])

            with tf.compat.v1.Session() as sess:
                sess.run([
                    tf.compat.v1.global_variables_initializer(),
                    tf.compat.v1.local_variables_initializer()
                ])
                logits_val = sess.run(tower_spec.logits_spec.logits,
                                      feed_dict={input_tensor: test_input})

            self.assertAllClose(expected_logits, logits_val, rtol=1e-3)
Пример #4
0
    def test_init_variables(self,
                            new_architecture,
                            expected_output,
                            new_tower_name="test_tower"):
        # Force graph mode
        with tf.compat.v1.Graph().as_default():
            directory = self.get_temp_dir()
            architecture = np.array([1, 3, 34])
            phoenix_spec = phoenix_spec_pb2.PhoenixSpec(
                problem_type=phoenix_spec_pb2.PhoenixSpec.CNN)
            with self.test_session(graph=tf.Graph()) as sess:
                input_tensor = tf.zeros([100, 32, 32, 3])
                _ = architecture_utils.construct_tower(
                    phoenix_spec=phoenix_spec,
                    input_tensor=input_tensor,
                    tower_name="test_tower",
                    architecture=architecture,
                    is_training=True,
                    lengths=None,
                    logits_dimension=10,
                    model_directory=self.get_temp_dir(),
                    hparams=hp.HParams(),
                    is_frozen=False,
                    dropout_rate=None)
                saver = tf.compat.v1.train.Saver()
                sess.run(tf.compat.v1.global_variables_initializer())
                sess.run(tf.compat.v1.local_variables_initializer())
                saver.save(sess, directory + "/ckpt")

            with self.test_session(graph=tf.Graph()) as sess:
                input_tensor = tf.zeros([100, 32, 32, 3])
                _ = architecture_utils.construct_tower(
                    phoenix_spec=phoenix_spec,
                    input_tensor=input_tensor,
                    tower_name=new_tower_name,
                    architecture=new_architecture,
                    is_training=True,
                    lengths=None,
                    logits_dimension=10,
                    hparams=hp.HParams(),
                    model_directory=self.get_temp_dir(),
                    is_frozen=False,
                    dropout_rate=None)
                snapshotting_variables = architecture_utils.init_variables(
                    tf.train.latest_checkpoint(directory),
                    "Phoenix/test_tower", "Phoenix/{}".format(new_tower_name))
                self.assertCountEqual(snapshotting_variables, expected_output)
  def _create_new_architecture(self, features, input_layer_fn,
                               shared_input_tensor, architecture, run_config,
                               my_id, is_training, shared_lengths, hparams,
                               logits_dimension, dropout_rate, prev_trial,
                               trials):
    logging.info("Creating new architecture: ")
    logging.info(architecture)

    input_tensor = shared_input_tensor
    lengths = shared_lengths
    if not self._phoenix_spec.is_input_shared:
      lengths_feature_name = self._phoenix_spec.lengths_feature_name
      if isinstance(features, dict) and lengths_feature_name not in features:
        lengths_feature_name = ""
      input_tensor, lengths = input_layer_fn(
          features=features,
          is_training=is_training,
          scope_name="Phoenix/" + self.generator_name() + "_0/Input",
          lengths_feature_name=lengths_feature_name)

    self._save_architecture(architecture, run_config.model_dir, my_id)

    tower_spec = architecture_utils.construct_tower(
        phoenix_spec=self._phoenix_spec,
        input_tensor=input_tensor,
        tower_name=self.generator_name() + "_0",
        architecture=architecture,
        is_training=is_training,
        lengths=lengths,
        logits_dimension=logits_dimension,
        is_frozen=False,
        hparams=hparams,
        model_directory=run_config.model_dir,
        dropout_rate=dropout_rate,
        allow_auxiliary_head=self._allow_auxiliary_head)
    logits_specs = [tower_spec.logits_spec]

    apply_snapshot = (
        self._phoenix_spec.transfer_learning_spec.transfer_learning_type ==
        transfer_learning_spec_pb2.TransferLearningSpec
        .SNAPSHOT_TRANSFER_LEARNING)
    if prev_trial and prev_trial > 0 and apply_snapshot:
      architecture_utils.init_variables(
          checkpoint=tf.train.latest_checkpoint(
              architecture_utils.DirectoryHandler.trial_dir(
                  self._get_trial_from_id(prev_trial, trials))),
          original_scope="Phoenix/{}_0".format(self.generator_name()),
          new_scope="Phoenix/{}_0".format(self.generator_name()))

    architecture_utils.set_number_of_towers(self.generator_name(), 1)
    return logits_specs, [tower_spec.architecture]
Пример #6
0
    def test_import_tower(self, shared_input):
        np.random.seed(42)
        test_input = np.random.random([1, 32, 32, 3])
        # Force graph mode
        with tf.compat.v1.Graph().as_default():
            directory = self.get_temp_dir()
            architecture = np.array([1, 3, 34])
            phoenix_spec = phoenix_spec_pb2.PhoenixSpec(
                problem_type=phoenix_spec_pb2.PhoenixSpec.CNN)
            phoenix_spec.is_input_shared = shared_input
            features = {}
            shared_input_tensor = None
            with self.test_session(graph=tf.Graph()) as sess:
                input_tensor_1 = tf.compat.v1.placeholder(
                    dtype=tf.float32, shape=[None, 32, 32, 3], name="input_1")
                tf.random.set_seed(1234)
                tower_spec_1 = architecture_utils.construct_tower(
                    phoenix_spec=phoenix_spec,
                    input_tensor=input_tensor_1,
                    tower_name="test_tower",
                    architecture=architecture,
                    is_training=True,
                    lengths=None,
                    logits_dimension=10,
                    hparams=hp.HParams(),
                    model_directory=self.get_temp_dir(),
                    is_frozen=False,
                    dropout_rate=None)
                saver = tf.compat.v1.train.Saver()
                sess.run(tf.compat.v1.global_variables_initializer())
                sess.run(tf.compat.v1.local_variables_initializer())
                logits_val_1 = sess.run(tower_spec_1.logits_spec.logits,
                                        feed_dict={input_tensor_1: test_input})
                saver.save(sess, directory + "/ckpt")

            with self.test_session(graph=tf.Graph()) as sess:
                input_tensor_2 = tf.compat.v1.placeholder(
                    dtype=tf.float32, shape=[None, 32, 32, 3], name="input_2")
                if shared_input:
                    shared_input_tensor = input_tensor_2

                    def _input_layer_fn(features,
                                        is_training,
                                        scope_name="Phoenix/Input",
                                        lengths_feature_name=None):
                        del features, is_training, scope_name, lengths_feature_name
                        return None, None
                else:
                    features = {"x": input_tensor_2}

                    def _input_layer_fn(features,
                                        is_training,
                                        scope_name="Phoenix/Input",
                                        lengths_feature_name=None):
                        del is_training, lengths_feature_name
                        with tf.compat.v1.variable_scope(scope_name):
                            return tf.cast(features["x"],
                                           dtype=tf.float32), None

                tf.random.set_seed(1234)
                tower_spec_2 = architecture_utils.import_tower(
                    features=features,
                    input_layer_fn=_input_layer_fn,
                    phoenix_spec=phoenix_spec,
                    shared_input_tensor=shared_input_tensor,
                    original_tower_name="test_tower",
                    new_tower_name="imported_tower",
                    model_directory=directory,
                    new_model_directory=self.get_temp_dir(),
                    is_training=True,
                    logits_dimension=10,
                    shared_lengths=None,
                    force_snapshot=False,
                    force_freeze=False)
                sess.run(tf.compat.v1.global_variables_initializer())
                sess.run(tf.compat.v1.local_variables_initializer())
                logits_val_2 = sess.run(tower_spec_2.logits_spec.logits,
                                        feed_dict={input_tensor_2: test_input})

            self.assertAllClose(logits_val_1, logits_val_2, rtol=1e-3)
Пример #7
0
    def create_model_spec(self,
                          features,
                          params,
                          learning_rate_spec,
                          use_tpu,
                          train_logits_specs,
                          eval_logits_spec,
                          labels,
                          mode,
                          lengths,
                          loss_fn,
                          predictions_fn,
                          optimizer_fn=None):
        """Creates model spec for all tasks."""
        is_training = mode == tf.estimator.ModeKeys.TRAIN

        if not optimizer_fn and is_training:
            optimizer_fn = _get_optimizer_fn(
                optimizer=params.optimizer,
                learning_rate=learning_rate_spec["learning_rate"],
                use_tpu=use_tpu,
                exponential_decay_steps=learning_rate_spec.get(
                    "exponential_decay_steps", -1),
                exponential_decay_rate=learning_rate_spec.get(
                    "exponential_decay_rate", -1),
                lr_warmup_steps=self._phoenix_spec.learning_spec.
                lr_warmup_steps)

        train_op_fn = eval_op_fn
        if is_training:
            train_op_fn = functools.partial(
                _train_op_fn,
                optimizer_fn=optimizer_fn,
                use_synchronous_optimizer=self._phoenix_spec.
                use_synchronous_optimizer,
                l2_regularization=learning_rate_spec.get(
                    "l2_regularization", -1),
                gradient_max_norm=learning_rate_spec.get(
                    "gradient_max_norm", -1))

        # One label / One task - Old Phoenix behavior
        if (self._phoenix_spec.multi_task_spec is None
                or len(self._phoenix_spec.multi_task_spec) <= 1):
            with tf.compat.v1.variable_scope("Phoenix/Trainer"):
                weights = 1.0
                if (self._phoenix_spec.weight_feature_name
                        and mode != tf.estimator.ModeKeys.PREDICT):
                    weights = features[self._phoenix_spec.weight_feature_name]

                task_spec = self._create_task_spec(
                    labels=labels,
                    weights=weights,
                    train_logits_specs=train_logits_specs,
                    eval_logits_spec=eval_logits_spec,
                    train_op_fn=train_op_fn,
                    name="Trainer",
                    mode=mode,
                    loss_fn=loss_fn)

                train_op = None
                if is_training:
                    train_op = _merge_train_op_list(
                        task_spec.train_op_list,
                        self._ensemble_spec.no_train_speedup)

                # create predictions here.
                predictions = predictions_fn(
                    eval_logits_spec.logits,
                    mode=mode,
                    temperature=self._phoenix_spec.temperature)

                return ModelSpec(loss=task_spec.loss,
                                 train_op=train_op,
                                 predictions=predictions,
                                 train_hooks=task_spec.train_hooks_list)

        # MultiTask. New Phoenix Behavior
        # Details in:
        # go/phoenix-multitask
        num_weights_in_labels = 0
        for task_spec in self._phoenix_spec.multi_task_spec:
            if not task_spec.weight_is_a_feature:
                num_weights_in_labels += 1

        # In predict mode we don't have labels
        if labels:
            assert (len(labels) == len(self._phoenix_spec.multi_task_spec) +
                    num_weights_in_labels)
        if len(train_logits_specs) > 1:
            logging.warning(
                "Using ensembling in a multi-task training. If there is "
                "no task that restrict the searchable logits from "
                "rotating, then training is going to produce bad "
                "non-aligned ensembles.")

        primary_task = None
        task_spec_list = []
        for task_spec in self._phoenix_spec.multi_task_spec:
            logits_spec = train_logits_specs[0]
            # Build tower on top of searched model for the specific task
            if task_spec.architecture:
                tower_architecture = [
                    blocks.BlockType[block_type]
                    for block_type in task_spec.architecture
                ]
                task_tower_spec = architecture_utils.construct_tower(
                    phoenix_spec=self._phoenix_spec,
                    input_tensor=tf.nn.relu(logits_spec.logits),
                    tower_name="task_{}_tower".format(task_spec.label_name),
                    architecture=np.array(tower_architecture),
                    is_training=is_training,
                    lengths=lengths,
                    logits_dimension=task_spec.number_of_classes,
                    is_frozen=False,
                    # TODO(b/172564129): add dropouts.
                    dropout_rate=None)
                # Ignore auxiliary heads for task towers, if any.
                task_logits = task_tower_spec.logits_spec.logits
            else:
                with tf.compat.v1.variable_scope(
                        "Phoenix/task_{}_tower".format(task_spec.label_name)):
                    task_logits = self._add_projection_if_needed(
                        logits_spec.logits, task_spec.number_of_classes)

            # Replace the base tower logits with those of the task tower.
            logits_spec = logits_spec._replace(logits=task_logits)
            if logits_spec.aux_logits:
                aux_logits = self._add_projection_if_needed(
                    logits_spec.aux_logits, task_spec.number_of_classes)
                logits_spec = logits_spec._replace(aux_logits=aux_logits)
            with tf.compat.v1.variable_scope("Phoenix/trainer_{}".format(
                    task_spec.label_name)):
                if mode != tf.estimator.ModeKeys.PREDICT:
                    task_labels = labels[task_spec.label_name]
                else:
                    task_labels = None

                weights = 1.0
                if (task_spec.weight_feature_name
                        and mode != tf.estimator.ModeKeys.PREDICT):
                    if task_spec.weight_is_a_feature:
                        weights = features[task_spec.weight_feature_name]
                    else:
                        weights = labels[task_spec.weight_feature_name]

                task = self._create_task_spec(labels=task_labels,
                                              weights=weights,
                                              train_logits_specs=[logits_spec],
                                              eval_logits_spec=logits_spec,
                                              train_op_fn=train_op_fn,
                                              name=task_spec.label_name,
                                              mode=mode,
                                              loss_fn=loss_fn)
                task_spec_list.append(task)
                if task_spec.label_name == self._phoenix_spec.primary_task_name:
                    primary_task = task

        if not self._phoenix_spec.primary_task_name:
            primary_task = task_spec_list[0]

        model_spec_predictions = {}
        for task in task_spec_list:
            task_predictions = predictions_fn(
                task.logits,
                mode=mode,
                temperature=self._phoenix_spec.temperature)
            for prediction_key, prediction_value in task_predictions.items():
                prediction_key_name = prediction_key + "/" + task.name
                model_spec_predictions[prediction_key_name] = prediction_value
                if task.name == primary_task.name:
                    model_spec_predictions[prediction_key] = prediction_value
        logging.info(model_spec_predictions)

        train_op_list = []
        train_hooks_list = []
        for task in task_spec_list:
            train_op_list.extend(task.train_op_list)
            train_hooks_list.extend(task.train_hooks_list)

        train_op = None
        if is_training:
            train_op = _merge_train_op_list(
                train_op_list, self._ensemble_spec.no_train_speedup)

        return ModelSpec(loss=primary_task.loss,
                         train_op=train_op,
                         predictions=model_spec_predictions,
                         train_hooks=train_hooks_list)