Exemplo n.º 1
0
    def _CreateLayerVariables(self):
        super()._CreateLayerVariables()
        p = self.params

        load_op_list = []
        retrieve_op_list = []

        # At the feature level, track which are associated
        # with "sequence embeddings".
        self._sequence_features = {}

        if py_utils.use_tpu():
            num_cores = self.cluster.params.worker.tpus_per_replica
            global_batch_size = (self.params.batch_size *
                                 self.cluster.num_splits_per_client)
            table_to_config_dict = {}
            feature_to_config_dict = {}
            for table in self.tables:
                table_to_config_dict[table.table_name] = table.table_config
                load_op_list += table.load_op_list
                retrieve_op_list += table.retrieve_op_list
                for feature in table.input_keys:
                    if table.max_sequence_length > 0:
                        self._sequence_features[feature] = True
                    feature_to_config_dict[
                        feature] = tpu_embedding_lib.FeatureConfig(
                            table.table_name,
                            max_sequence_length=table.max_sequence_length)

            tpu_embedding = self._tpu_embedding_collection.tpu_embedding
            if tpu_embedding:
                self._CheckTPUEmbeddingConfig(tpu_embedding,
                                              table_to_config_dict,
                                              feature_to_config_dict,
                                              global_batch_size)
                tf.logging.info(
                    'TPUEmbedding API singleton already exists, reusing')
                self._tpu_embedding = tpu_embedding
            else:
                tf.logging.info('adding load and retrieve ops to collection.')
                self._tpu_embedding_collection.AddLoadOps(load_op_list)
                self._tpu_embedding_collection.AddRetrieveOps(retrieve_op_list)

                mode = tpu_embedding_lib.TRAINING
                device_config = tpu_embedding_lib.DeviceConfig(
                    num_cores=num_cores,
                    num_hosts=self.params.tables[0].num_tpu_hosts,
                    job_name=self.cluster.params.worker.name)
                self._tpu_embedding = tpu_embedding_lib.TPUEmbedding(
                    table_to_config_dict,
                    feature_to_config_dict,
                    global_batch_size,
                    mode,
                    master=None,
                    pipeline_execution_with_tensor_core=(
                        self.params.pipeline_execution_with_tensor_core),
                    partition_strategy=p.partition_strategy,
                    device_config=device_config)
                self._tpu_embedding_collection.tpu_embedding = self._tpu_embedding
Exemplo n.º 2
0
    def _CreateLayerVariables(self):
        super()._CreateLayerVariables()

        load_op_list = []
        retrieve_op_list = []

        # At the feature level, track which are associated
        # with "sequence embeddings".
        self._sequence_features = {}

        if py_utils.use_tpu():
            num_cores = self.cluster.params.worker.tpus_per_replica
            global_batch_size = (self.params.batch_size *
                                 self.cluster.num_splits_per_client)
            table_to_config_dict = {}
            feature_to_config_dict = {}
            for table in self.tables:
                table_to_config_dict[table.table_name] = table.table_config
                load_op_list += table.load_op_list
                retrieve_op_list += table.retrieve_op_list
                for feature in table.input_keys:
                    if table.max_sequence_length > 0:
                        self._sequence_features[feature] = True
                    feature_to_config_dict[
                        feature] = tpu_embedding_lib.FeatureConfig(
                            table.table_name,
                            max_sequence_length=table.max_sequence_length)
            tf.logging.info('adding load and retrieve ops to collection.')
            tf.add_to_collection(py_utils.TPU_EMBEDDING_LOAD_OPS, load_op_list)
            tf.add_to_collection(py_utils.TPU_EMBEDDING_RETRIEVE_OPS,
                                 retrieve_op_list)

            tpu_embedding_collection = tf.get_collection(
                py_utils.TPU_EMBEDDING)
            assert len(tpu_embedding_collection) <= 1
            if len(tpu_embedding_collection) == 1:
                tf.logging.info(
                    'TPUEmbedding API singleton already exists, reusing')
                self._tpu_embedding = tpu_embedding_collection[0]
            else:
                mode = tpu_embedding_lib.TRAINING
                device_config = tpu_embedding_lib.DeviceConfig(
                    num_cores=num_cores,
                    num_hosts=self.params.tables[0].num_tpu_hosts,
                    job_name=self.cluster.params.worker.name)
                self._tpu_embedding = tpu_embedding_lib.TPUEmbedding(
                    table_to_config_dict,
                    feature_to_config_dict,
                    global_batch_size,
                    mode,
                    master=None,
                    pipeline_execution_with_tensor_core=(
                        self.params.pipeline_execution_with_tensor_core),
                    device_config=device_config)
                tf.add_to_collection(py_utils.TPU_EMBEDDING,
                                     self._tpu_embedding)
def get_configs_from_feature_columns(feature_columns):
    """Create configs for TPUEmbedding etc from a list of feature columns.

  Args:
    feature_columns: a list of supported feature columns.

  Returns:
    A tuple of dicts, the first maps tables to their config, the second maps
    features to their config, the third maps learning rate key to callback that
    takes global step and outputs dynamic learning rate.
  """

    allowed = (
        tpu_fc_v2._TPUEmbeddingColumnV2,  # pylint: disable=protected-access
        tpu_fc_v2._TPUSharedEmbeddingColumnV2)  # pylint: disable=protected-access
    warn = (tpu_fc._TPUEmbeddingColumn, tpu_fc._TPUSharedEmbeddingColumn)  # pylint: disable=protected-access

    for column in feature_columns:
        if not isinstance(column, allowed + warn):
            raise TypeError(
                'Unsupported feature column {}. Supported types are {}.'.
                format(type(column), allowed))
        if isinstance(column, warn):
            logging.warn(
                'Columns of type {} are deprecated. Supported types are {}.'.
                format(type(column), allowed))

    table_to_config = {}
    feature_to_config = {}
    lr_key_to_fn, lr_fn_to_key = get_lr(feature_columns)
    lr_fn_to_key[None] = None
    for column in feature_columns:
        feature_name = column.get_feature_key_name()
        table_name = _get_table_name_from_embedding_var_name(
            column.get_embedding_var_name())
        if feature_name in feature_to_config:
            raise ValueError(
                'Feature column {} is used with multiple embeddings and this is '
                'not supported.'.format(feature_name))
        feature_to_config[feature_name] = tpu_embedding.FeatureConfig(
            table_id=table_name,
            max_sequence_length=column.get_max_sequence_length(),
            weight_key=column.get_weight_key_name())
        vocabulary_size, dimension = column.get_embedding_table_size()
        table_to_config[table_name] = tpu_embedding.TableConfig(
            vocabulary_size=vocabulary_size,
            dimension=dimension,
            initializer=column.get_initializer(),
            combiner=column.get_combiner(),
            learning_rate_key=lr_fn_to_key[column.get_learning_rate_fn()])

    return table_to_config, feature_to_config, lr_key_to_fn
Exemplo n.º 4
0
        def _BuildTpuEmbeddingApi():
            load_op_list = []
            retrieve_op_list = []

            num_cores = self.cluster.params.worker.tpus_per_replica
            global_batch_size = (self.params.batch_size *
                                 self.cluster.num_splits_per_client)
            table_to_config_dict = {}
            feature_to_config_dict = {}
            for table in self.tables:
                table_to_config_dict[table.table_name] = table.table_config
                load_op_list += table.load_op_list
                retrieve_op_list += table.retrieve_op_list
                for feature in table.input_keys:
                    feature_to_config_dict[
                        feature] = tpu_embedding_lib.FeatureConfig(
                            table.table_name,
                            max_sequence_length=table.max_sequence_length)

            mode = tpu_embedding_lib.TRAINING
            device_config = tpu_embedding_lib.DeviceConfig(
                num_cores=num_cores,
                num_hosts=self.params.tables[0].num_tpu_hosts,
                job_name=self.cluster.params.worker.name)
            tpu_embedding = tpu_embedding_lib.TPUEmbedding(
                table_to_config_dict,
                feature_to_config_dict,
                global_batch_size,
                mode,
                master=None,
                pipeline_execution_with_tensor_core=(
                    self.params.pipeline_execution_with_tensor_core),
                partition_strategy=p.partition_strategy,
                device_config=device_config)

            with tf.init_scope():
                dummy_variables, dummy_variables_init = (
                    tpu_embedding_gradient.create_dummy_table_variables(
                        tpu_embedding))
            load_op_list += [dummy_variables_init]

            tf.add_to_collection(py_utils.TPU_EMBEDDING, tpu_embedding)
            tf.add_to_collection(py_utils.TPU_EMBEDDING_DUMMY_VARS,
                                 dummy_variables)
            tf.add_to_collection(py_utils.TPU_EMBEDDING_LOAD_OPS, load_op_list)
            tf.add_to_collection(py_utils.TPU_EMBEDDING_RETRIEVE_OPS,
                                 retrieve_op_list)
Exemplo n.º 5
0
  def _CreateLayerVariables(self):
    super()._CreateLayerVariables()
    p = self.params

    # At the feature level, track which are associated
    # with "sequence embeddings".
    self._sequence_features = {}

    if _ShouldUseTpu(p):
      num_cores = self.cluster.params.worker.tpus_per_replica
      global_batch_size = (
          self.params.batch_size * self.cluster.num_splits_per_client)
      table_to_config_dict = {}
      feature_to_config_dict = {}
      for table in self.tables:
        table_to_config_dict[table.table_name] = table.table_config
        for feature in table.input_keys:
          if table.max_sequence_length > 0:
            self._sequence_features[feature] = True
          feature_to_config_dict[feature] = tpu_embedding_lib.FeatureConfig(
              table.table_name, max_sequence_length=table.max_sequence_length)

      tpu_embedding = self._tpu_embedding_collection.tpu_embedding
      if tpu_embedding:
        self._CheckTPUEmbeddingConfig(tpu_embedding, table_to_config_dict,
                                      feature_to_config_dict, global_batch_size)
        tf.logging.info('TPUEmbedding API singleton already exists, reusing')
        self._tpu_embedding = tpu_embedding
      else:
        mode = tpu_embedding_lib.TRAINING
        device_config = tpu_embedding_lib.DeviceConfig(
            num_cores=num_cores,
            num_hosts=self.params.tables[0].num_tpu_hosts,
            job_name=self.cluster.params.worker.name)
        self._tpu_embedding = tpu_embedding_lib.TPUEmbedding(
            table_to_config_dict,
            feature_to_config_dict,
            global_batch_size,
            mode,
            master=None,
            pipeline_execution_with_tensor_core=(
                self.params.pipeline_execution_with_tensor_core),
            partition_strategy=p.partition_strategy,
            device_config=device_config)
        self._tpu_embedding_collection.tpu_embedding = self._tpu_embedding
        self._tpu_embedding_collection.SetGradientMultiplierSchedule(
            self.gradient_multiplier_schedule)
def get_configs_from_feature_columns(feature_columns):
    """Create configs for TPUEmbedding etc from a list of feature columns.

  Args:
    feature_columns: a list of supported feature columns.

  Returns:
    A tuple of dicts, the first maps tables to their config, the second maps
    features to their config, and the third maps features to weight key names.
  """

    allowed = (tpu_fc._TPUEmbeddingColumn, tpu_fc._TPUSharedEmbeddingColumn)  # pylint: disable=protected-access

    for column in feature_columns:
        if not isinstance(column, allowed):
            raise TypeError(
                'Unsupported feature column {}. Supported types are {}.'.
                format(type(column), allowed))

    table_to_config = {}
    feature_to_config = {}
    feature_to_weight_key_name = {}
    for column in feature_columns:
        feature_name = column.get_feature_key_name()
        table_name = _get_table_name_from_embedding_var_name(
            column.get_embedding_var_name())
        if feature_name in feature_to_config:
            raise ValueError(
                'Feature column {} is used with multiple embeddings and this is '
                'not supported.'.format(feature_name))
        feature_to_config[feature_name] = tpu_embedding.FeatureConfig(
            table_id=table_name,
            max_sequence_length=column.get_max_sequence_length())
        feature_to_weight_key_name[feature_name] = column.get_weight_key_name()
        vocabulary_size, dimension = column.get_embedding_table_size()
        table_to_config[table_name] = tpu_embedding.TableConfig(
            vocabulary_size=vocabulary_size,
            dimension=dimension,
            initializer=column.get_initializer(),
            combiner=column.get_combiner())

    return table_to_config, feature_to_config, feature_to_weight_key_name
Exemplo n.º 7
0
  def get_feature_tbl_config(self):
    """Creates table configuration data structures.

    For all tables, vocab size and width are given by params.

    Table setup:
    tbl0 - categorical-feature-14
    tbl1 - categorical-feature-15
    ..

    Feature setup:
    categorical-feature-14 -- tbl0 (first sparse feature)
    categorical-feature-15 -- tbl1 (second sparse feature)

    Returns:
      A tuple of dicts, one for feature_to_config and one for table_to_config.
    """

    def lr_fn(global_step):
      """Learning function for the embeddings. Linear warmup and poly decay."""
      decay_exp = 2
      scal = self._batch_size / 2048
      adj_lr = self._learning_rate * scal
      if self._lr_warmup_steps == 0:
        return adj_lr
      if self._optimizer == "adagrad":
        return self._learning_rate
      warmup_lr = tf.cast(
          global_step, dtype=tf.float32) / self._lr_warmup_steps * adj_lr

      global_step = tf.cast(global_step, tf.float32)
      decay_steps = tf.cast(self._decay_steps, tf.float32)
      decay_start_step = tf.cast(self._decay_start_steps, tf.float32)
      steps_since_decay_start = global_step - decay_start_step
      already_decayed_steps = tf.minimum(steps_since_decay_start, decay_steps)
      decay_lr = adj_lr * (
          (decay_steps - already_decayed_steps) / decay_steps)**decay_exp
      decay_lr = tf.maximum(0.0000001, decay_lr)

      lr = tf.where(
          global_step < self._lr_warmup_steps, warmup_lr,
          tf.where(
              tf.logical_and(decay_steps > 0, global_step > decay_start_step),
              decay_lr, adj_lr))

      return lr

    table_to_config_dict = {}
    for i in range(self._num_tables_in_ec):
      vocab_size = self._table_size_sorted[i]
      table_to_config_dict["tbl%02d" % i] = tpu_embedding.TableConfig(
          vocabulary_size=vocab_size,
          dimension=self._de,
          # NOTE: Default weight initializer uses trunc_normal,
          #       stddv=1/dimension. This is changed to match the mlperf
          #       reference model.
          initializer=tf.random_uniform_initializer(
              minval=-1 / math.sqrt(vocab_size),
              maxval=1 / math.sqrt(vocab_size)),
          combiner=None,
          learning_rate_fn=lr_fn,

          # TODO(tayo): Using the utils lr_fn leads to problems with embedding
          # table size. The embedding table stops being able to fit.
          # learning_rate_fn=functools.partial(utils.lr_fn, params)
      )

    # Use an offset to allow the categorical feature numbering to be subsequent
    # to the integer feature numbering.
    offset = 1 + self._num_dense_features
    feature_to_config_dict = {}
    feature_to_config_dict.update([
        ("categorical-feature-%02d" % i,
         tpu_embedding.FeatureConfig(table_id="tbl%02d" % (i - offset)))
        for i in range(offset, offset + self._num_tables_in_ec)
    ])

    return feature_to_config_dict, table_to_config_dict