Пример #1
0
  def _annotate_dataset_statistics(self, model_card: ModelCard) -> ModelCard:
    """Annotates a model card with info from TFDV dataset statistics.

    Graphics for the dataset statistics are generated and appended to the
    Dataset section.

    Dataset statistics are read from both TfdvSource or MlmdSource, whichever is
    provided. Using both may cause duplicates to be recorded. If neither is
    provided, this function will be a no-op.

    Args:
      model_card: The model card object to annotate with TFDV dataset
        statistics.

    Returns:
      The model_card with dataset statistics annotated.
    """
    if self._source and self._source.tfdv:
      for dataset_stats_path in self._source.tfdv.dataset_statistics_paths:
        if self._source.tfdv.features_include or self._source.tfdv.features_exclude:
          data_stats = tfx_util.read_stats_protos_and_filter_features(
              dataset_stats_path, self._source.tfdv.features_include,
              self._source.tfdv.features_exclude)
        else:
          data_stats = tfx_util.read_stats_protos(dataset_stats_path)
        graphics.annotate_dataset_feature_statistics_plots(
            model_card, data_stats)
    if self._store:
      stats_artifacts = tfx_util.get_stats_artifacts_for_model(
          self._store, self._artifact_with_model_uri.id)
      for stats_artifact in stats_artifacts:
        data_stats = tfx_util.read_stats_protos(stats_artifact.uri)
        graphics.annotate_dataset_feature_statistics_plots(
            model_card, data_stats)
    return model_card
 def test_get_stats_artifacts_for_model(self):
   store = testdata_utils.get_tfx_pipeline_metadata_store(self.tmp_db_path)
   got_stats = tfx_util.get_stats_artifacts_for_model(
       store, testdata_utils.TFX_0_21_MODEL_ARTIFACT_ID)
   got_stats_ids = [a.id for a in got_stats]
   self.assertCountEqual(got_stats_ids,
                         [testdata_utils.TFX_0_21_STATS_ARTIFACT_ID])
    def _scaffold_model_card(self) -> ModelCard:
        """Generates the model card during scaffold_assets phase.

    It includes the implementation details for auto-populated ModelCard fields
    given the specialization of the ModelCardToolkit.

    Returns:
      A ModelCard representing the given model.
    """
        model_card = ModelCard()
        if self._store:
            model_card = tfx_util.generate_model_card_for_model(
                self._store, self._artifact_with_model_uri.id)
            metrics_artifacts = tfx_util.get_metrics_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)
            stats_artifacts = tfx_util.get_stats_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)

            for metrics_artifact in metrics_artifacts:
                eval_result = tfx_util.read_metrics_eval_result(
                    metrics_artifact.uri)
                if eval_result is not None:
                    graphics.annotate_eval_result_plots(
                        model_card, eval_result)

            for stats_artifact in stats_artifacts:
                train_stats = tfx_util.read_stats_proto(
                    stats_artifact.uri, 'train')
                eval_stats = tfx_util.read_stats_proto(stats_artifact.uri,
                                                       'eval')
                graphics.annotate_dataset_feature_statistics_plots(
                    model_card, [train_stats, eval_stats])
        return model_card
Пример #4
0
    def scaffold_assets(self) -> ModelCard:
        """Generates the model cards tookit assets.

    Model cards assets include the model card json file and customizable model
    card UI templates.

    An assets directory is created if one does not already exist.

    If the MCT is initialized with a `mlmd_store`, it further auto-populates
    the model cards properties as well as generating related plots such as model
    performance and data distributions.

    Returns:
      A ModelCard representing the given model.

    Raises:
      FileNotFoundError: if it failed to copy the UI template files.
    """
        model_card = ModelCard()
        if self._store:
            model_card = tfx_util.generate_model_card_for_model(
                self._store, self._artifact_with_model_uri.id)
            metrics_artifacts = tfx_util.get_metrics_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)
            stats_artifacts = tfx_util.get_stats_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)

            for metrics_artifact in metrics_artifacts:
                eval_result = tfx_util.read_metrics_eval_result(
                    metrics_artifact.uri)
                if eval_result is not None:
                    graphics.annotate_eval_result_plots(
                        model_card, eval_result)

            for stats_artifact in stats_artifacts:
                train_stats = tfx_util.read_stats_proto(
                    stats_artifact.uri, 'train')
                eval_stats = tfx_util.read_stats_proto(stats_artifact.uri,
                                                       'eval')
                graphics.annotate_dataset_feature_statistics_plots(
                    model_card, train_stats, eval_stats)

        # Write JSON file.
        self._write_file(self._mcta_json_file, model_card.to_json())

        # Write UI template files.
        for template_path in _UI_TEMPLATES:
            template_content = pkgutil.get_data('model_card_toolkit',
                                                template_path)
            if template_content is None:
                raise FileNotFoundError(f"Cannot find file: '{template_path}'")
            template_content = template_content.decode('utf8')
            self._write_file(os.path.join(self.output_dir, template_path),
                             template_content)

        return model_card
Пример #5
0
    def scaffold_assets(self) -> model_card_module.ModelCard:
        """Generates the model cards tookit assets.

    Model cards assets include the model card json file and customizable model
    card UI templates.

    An assets directory is created if one does not already exist.

    If the MCT is initialized with a `mlmd_store`, it further auto-populates
    the model cards properties as well as generating related plots such as model
    performance and data distributions.

    Returns:
      A ModelCard representing the given model.
    """
        model_card = model_card_module.ModelCard()
        if self._store:
            model_card = tfx_util.generate_model_card_for_model(
                self._store, self._artifact_with_model_uri.id)
            metrics_artifacts = tfx_util.get_metrics_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)
            stats_artifacts = tfx_util.get_stats_artifacts_for_model(
                self._store, self._artifact_with_model_uri.id)

            for metrics_artifact in metrics_artifacts:
                eval_result = tfx_util.read_metrics_eval_result(
                    metrics_artifact.uri)
                if eval_result is not None:
                    graphics.annotate_eval_result_plots(
                        model_card, eval_result)

            for stats_artifact in stats_artifacts:
                train_stats = tfx_util.read_stats_proto(
                    stats_artifact.uri, 'train')
                eval_stats = tfx_util.read_stats_proto(stats_artifact.uri,
                                                       'eval')
                graphics.annotate_dataset_feature_statistics_plots(
                    model_card, train_stats, eval_stats)

        # Write JSON file.
        self._write_file(self._mcta_json_file, model_card.to_json())
        # Write UI template files.
        shutil.copytree(_UI_TEMPLATE_DIR, self._mcta_template_dir)
        return model_card
 def test_get_stats_artifacts_for_model_with_invalid_db(self):
   empty_db = self._get_empty_metadata_store()
   with self.assertRaisesRegex(ValueError, '`store` is invalid'):
     tfx_util.get_stats_artifacts_for_model(
         empty_db, testdata_utils.TFX_0_21_MODEL_ARTIFACT_ID)
 def test_get_stats_artifacts_for_model_with_invalid_model(self):
   store = testdata_utils.get_tfx_pipeline_metadata_store(self.tmp_db_path)
   with self.assertRaisesRegex(ValueError, 'not an instance of Model'):
     tfx_util.get_stats_artifacts_for_model(
         store, testdata_utils.TFX_0_21_MODEL_DATASET_ID)
 def test_get_stats_artifacts_for_model_with_model_not_found(self):
   store = testdata_utils.get_tfx_pipeline_metadata_store(self.tmp_db_path)
   with self.assertRaisesRegex(ValueError, 'model_id cannot be found'):
     model = metadata_store_pb2.Artifact()
     tfx_util.get_stats_artifacts_for_model(store, model.id)