Exemplo n.º 1
0
 def test_gam_model_builder_with_spec(self):
   gam_scorer = model_lib.GAMScorer(
       example_hidden_layer_dims=[10, 10], context_hidden_layer_dims=[10, 10])
   gam_model = model_lib.ModelBuilder(
       input_creator=self._input_creator,
       preprocessor=self._preprocessor,
       mask_feature_name="mask",
       scorer=gam_scorer,
       name="test_model").build()
   output = gam_model({
       "context_1": tf.convert_to_tensor([[1]]),
       "feature_1": tf.convert_to_tensor([[[0.], [1], [2]]]),
       "feature_2": tf.convert_to_tensor([[[0.], [1], [2]]]),
       "mask": tf.convert_to_tensor([[True, True, True]]),
   })
   self.assertAllEqual(output.shape.as_list(), [1, 3])
Exemplo n.º 2
0
 def test_dnn_model_builder_with_spec(self):
   dnn_scorer = model_lib.DNNScorer(hidden_layer_dims=[10, 10], output_units=1)
   dnn_model = model_lib.ModelBuilder(
       input_creator=self._input_creator,
       preprocessor=self._preprocessor,
       scorer=dnn_scorer,
       mask_feature_name="mask",
       name="test_model",
   ).build()
   output = dnn_model({
       "context_1": tf.ragged.constant([[3, 1, 4, 1]]),
       "feature_1": tf.convert_to_tensor([[[0.], [1], [2]]]),
       "feature_2": tf.convert_to_tensor([[[0.], [1], [2]]]),
       "mask": tf.convert_to_tensor([[True, True, True]]),
   })
   self.assertAllEqual(output.shape.as_list(), [1, 3])
Exemplo n.º 3
0
    def test_model_to_saved_model_dense_inputs(self):
        dnn_scorer = model_lib.DNNScorer(hidden_layer_dims=[10, 10],
                                         output_units=1)
        dnn_model = model_lib.ModelBuilder(
            input_creator=self._input_creator,
            preprocessor=self._preprocessor,
            scorer=dnn_scorer,
            mask_feature_name="mask",
            name="test_model",
        ).build()

        @tf.function(
            input_signature=[tf.TensorSpec(shape=(1, ), dtype=tf.string)])
        def _predict(serialized):
            features = data.parse_from_example_list(
                serialized,
                context_feature_spec=self._context_feature_spec,
                example_feature_spec=self._example_feature_spec,
                mask_feature_name="mask")
            scores = dnn_model(inputs=features, training=False)
            return {"predictions": scores}

        dnn_model.infer_from_proto = _predict

        # Export the model to a SavedModel.
        tf.saved_model.save(dnn_model,
                            export_dir="/tmp/keras_model",
                            signatures={"predict": dnn_model.infer_from_proto})

        # Import ranker from SavedModel.
        imported = tf.saved_model.load("/tmp/keras_model")
        imported_model = imported.signatures["predict"]
        imported_output = imported_model(
            tf.convert_to_tensor([ELWC_PROTO.SerializeToString()
                                  ]))["predictions"]

        output = dnn_model({
            "context_1":
            tf.ragged.constant([[3, 1, 4, 1]]),
            "feature_1":
            tf.convert_to_tensor([[[0.], [1], [2]]]),
            "feature_2":
            tf.convert_to_tensor([[[0.], [1], [2]]]),
            "mask":
            tf.convert_to_tensor([[True, True, True]]),
        })
        self.assertAllClose(output.numpy(), imported_output.numpy())
Exemplo n.º 4
0
 def test_multi_task_scorer(self):
   mt_model = model_lib.ModelBuilder(
       input_creator=self._input_creator,
       preprocessor=self._preprocessor,
       scorer=DummyMultiTaskScorer(),
       mask_feature_name="mask",
       name="test_model",
   ).build()
   output = mt_model({
       "context_1": tf.ragged.constant([[3, 1, 4, 1]]),
       "feature_1": tf.convert_to_tensor([[[0.], [1], [2]]]),
       "feature_2": tf.convert_to_tensor([[[0.], [1], [2]]]),
       "mask": tf.convert_to_tensor([[True, True, True]]),
   })
   self.assertEqual(len(output), 2)
   self.assertAllEqual(output["task1"].numpy(), [[1., 2., 3.]])
   self.assertAllEqual(output["task2"].numpy(), [[2., 4., 6.]])
Exemplo n.º 5
0
  def test_pipeline_with_multi_task(self):
    data_dir = self.create_tempdir()
    data_file = os.path.join(data_dir, "elwc.tfrecord")
    if tf.io.gfile.exists(data_file):
      tf.io.gfile.remove(data_file)

    with tf.io.TFRecordWriter(data_file) as writer:
      for _ in range(256):
        writer.write(ELWC.SerializeToString())

    model_dir = os.path.join(data_dir, "model")

    dataset_hparams = pipeline.DatasetHparams(
        train_input_pattern=data_file,
        valid_input_pattern=data_file,
        train_batch_size=128,
        valid_batch_size=128,
        list_size=2,
        dataset_reader=tf.data.TFRecordDataset,
        convert_labels_to_binary=False)
    pipeline_hparams = pipeline.PipelineHparams(
        model_dir=model_dir,
        num_epochs=2,
        steps_per_epoch=5,
        validation_steps=2,
        learning_rate=0.01,
        loss={
            "task1": "softmax_loss",
            "task2": "pairwise_logistic_loss"
        },
        loss_weights={
            "task1": 1.0,
            "task2": 2.0
        },
        export_best_model=True,
        strategy="MirroredStrategy")

    context_feature_spec = {
        "cf_1":
            tf.io.FixedLenFeature(
                shape=(1,), dtype=tf.float32, default_value=0.0),
    }
    example_feature_spec = {
        "custom_features_{}".format(i + 1):
        tf.io.FixedLenFeature(shape=(1,), dtype=tf.float32, default_value=0.0)
        for i in range(3)
    }
    label_spec = (_LABEL_FEATURE,
                  tf.io.FixedLenFeature(
                      shape=(1,),
                      dtype=tf.float32,
                      default_value=_PADDING_LABEL))
    label_spec = {"task1": label_spec, "task2": label_spec}
    weight_spec = ("weight",
                   tf.io.FixedLenFeature(
                       shape=(1,), dtype=tf.float32, default_value=1.))

    model_builder = model_lib.ModelBuilder(
        input_creator=model_lib.FeatureSpecInputCreator(context_feature_spec,
                                                        example_feature_spec),
        preprocessor=model_lib.PreprocessorWithSpec({}),
        scorer=DummyMultiTaskScorer(),
        mask_feature_name=_MASK,
        name="multi_task_model",
    )

    ranking_pipeline = pipeline.MultiTaskPipeline(
        model_builder,
        dataset_builder=pipeline.MultiLabelDatasetBuilder(
            context_feature_spec,
            example_feature_spec,
            _MASK,
            label_spec,
            dataset_hparams,
            sample_weight_spec=weight_spec),
        hparams=pipeline_hparams)

    ranking_pipeline.train_and_validate(verbose=1)

    latest_model_path = os.path.join(model_dir, "export/latest_model")
    self.assertTrue(tf.saved_model.contains_saved_model(latest_model_path))

    latest_model = tf.saved_model.load(export_dir=latest_model_path)
    listwise_predictor = latest_model.signatures[
        tf.saved_model.PREDICT_METHOD_NAME]
    listwise_logits = listwise_predictor(
        tf.convert_to_tensor([ELWC.SerializeToString()] * 2))["task1"]
    self.assertAllEqual([2, 2], listwise_logits.get_shape().as_list())

    pointwise_predictor = latest_model.signatures[
        tf.saved_model.REGRESS_METHOD_NAME]
    pointwise_logits = pointwise_predictor(
        tf.convert_to_tensor([
            EXAMPLE_PROTO_1.SerializeToString(),
            EXAMPLE_PROTO_2.SerializeToString()
        ]))["task1"]
    self.assertAllEqual([2], pointwise_logits.get_shape().as_list())

    self.assertAllClose(pointwise_logits, listwise_logits[0])
Exemplo n.º 6
0
    def test_pipeline_with_feature_specs(self, convert_labels_to_binary):
        data_dir = self.create_tempdir()
        data_file = os.path.join(data_dir, "elwc.tfrecord")
        if tf.io.gfile.exists(data_file):
            tf.io.gfile.remove(data_file)

        with tf.io.TFRecordWriter(data_file) as writer:
            for _ in range(256):
                writer.write(ELWC.SerializeToString())

        context_feature_spec = {
            "cf_1":
            tf.io.FixedLenFeature(shape=(1, ),
                                  dtype=tf.float32,
                                  default_value=0.0),
        }
        example_feature_spec = {
            "custom_features_{}".format(i + 1):
            tf.io.FixedLenFeature(shape=(1, ),
                                  dtype=tf.float32,
                                  default_value=0.0)
            for i in range(3)
        }
        label_spec = (_LABEL_FEATURE,
                      tf.io.FixedLenFeature(shape=(1, ),
                                            dtype=tf.float32,
                                            default_value=_PADDING_LABEL))

        train_data_config = task_lib.RankingDataConfig(
            input_path=data_file,
            is_training=True,
            global_batch_size=128,
            list_size=2,
            mask_feature_name=_MASK,
            dataset_fn="tfrecord",
            convert_labels_to_binary=convert_labels_to_binary)
        validation_data_config = task_lib.RankingDataConfig(
            input_path=data_file,
            is_training=False,
            global_batch_size=128,
            list_size=2,
            mask_feature_name=_MASK,
            dataset_fn="tfrecord",
            convert_labels_to_binary=convert_labels_to_binary)

        dnn_scorer = model_lib.DNNScorer(hidden_layer_dims=[16, 8],
                                         output_units=1)
        model_builder = model_lib.ModelBuilder(
            input_creator=model_lib.FeatureSpecInputCreator(
                context_feature_spec, example_feature_spec),
            preprocessor=model_lib.PreprocessorWithSpec({}),
            scorer=dnn_scorer,
            mask_feature_name=_MASK,
            name="test_model",
        )

        ranking_task_config = task_lib.RankingTaskConfig(
            train_data=train_data_config,
            validation_data=validation_data_config,
            loss="softmax_loss")
        task = task_lib.RankingTask(ranking_task_config,
                                    model_builder=model_builder,
                                    context_feature_spec=context_feature_spec,
                                    example_feature_spec=example_feature_spec,
                                    label_spec=label_spec)

        model = task.build_model()
        metrics = task.build_metrics()
        train_dataset = task.build_inputs(ranking_task_config.train_data)
        vali_dataset = task.build_inputs(ranking_task_config.validation_data)

        task.initialize(model)
        train_iterator = iter(train_dataset)
        vali_iterator = iter(vali_dataset)
        optimizer = tf.keras.optimizers.SGD(lr=0.1)
        task.train_step(next(train_iterator),
                        model,
                        optimizer,
                        metrics=metrics)
        task.validation_step(next(vali_iterator), model, metrics=metrics)