def custom_extractors( eval_shared_model: tfma.MaybeMultipleEvalSharedModels, eval_config: tfma.EvalConfig, tensor_adapter_config: tensor_adapter.TensorAdapterConfig, ) -> List[tfma.extractors.Extractor]: return tfma.default_extractors(eval_shared_model=eval_shared_model, eval_config=eval_config, tensor_adapter_config=tensor_adapter_config)
def custom_extractors( eval_config, eval_shared_model, tensor_adapter_config) -> List[tfma.extractors.Extractor]: return tfma.default_extractors( eval_config=eval_config, eval_shared_model=eval_shared_model, tensor_adapter_config=tensor_adapter_config, custom_predict_extractor=BatchedPredictExtractor( eval_config, eval_shared_model, tensor_adapter_config))
def custom_extractors( eval_shared_model: tfma.MaybeMultipleEvalSharedModels, eval_config: tfma.EvalConfig, tensor_adapter_config: tensor_adapter.TensorAdapterConfig, ) -> List[tfma.extractors.Extractor]: """Returns default extractors plus a custom prediction extractor.""" predict_extractor = _make_sklearn_predict_extractor(eval_shared_model) return tfma.default_extractors(eval_shared_model=eval_shared_model, eval_config=eval_config, tensor_adapter_config=tensor_adapter_config, custom_predict_extractor=predict_extractor)
def process_tfma(schema_file, big_query_table=None, eval_model_dir=None, max_eval_rows=None, pipeline_args=None, publish_to_bq=False, project=None, metrics_table=None, metrics_dataset=None): """Runs a batch job to evaluate the eval_model against the given input. Args: schema_file: A file containing a text-serialized Schema that describes the eval data. big_query_table: A BigQuery table name specified as DATASET.TABLE which should be the input for evaluation. This can only be set if input_csv is None. eval_model_dir: A directory where the eval model is located. max_eval_rows: Number of rows to query from BigQuery. pipeline_args: additional DataflowRunner or DirectRunner args passed to the beam pipeline. publish_to_bq: project: metrics_dataset: metrics_table: Raises: ValueError: if input_csv and big_query_table are not specified correctly. """ if big_query_table is None: raise ValueError( '--big_query_table should be provided.') slice_spec = [ tfma.slicer.SingleSliceSpec(), tfma.slicer.SingleSliceSpec(columns=['trip_start_hour']) ] metrics_namespace = metrics_table schema = taxi.read_schema(schema_file) eval_shared_model = tfma.default_eval_shared_model( eval_saved_model_path=eval_model_dir, add_metrics_callbacks=[ tfma.post_export_metrics.calibration_plot_and_prediction_histogram(), tfma.post_export_metrics.auc_plots() ]) metrics_monitor = None if publish_to_bq: metrics_monitor = MetricsReader( publish_to_bq=publish_to_bq, project_name=project, bq_table=metrics_table, bq_dataset=metrics_dataset, filters=MetricsFilter().with_namespace(metrics_namespace) ) pipeline = beam.Pipeline(argv=pipeline_args) query = taxi.make_sql(big_query_table, max_eval_rows, for_eval=True) raw_feature_spec = taxi.get_raw_feature_spec(schema) raw_data = ( pipeline | 'ReadBigQuery' >> ReadFromBigQuery(query=query, project=project, use_standard_sql=True) | 'Measure time: Start' >> beam.ParDo(MeasureTime(metrics_namespace)) | 'CleanData' >> beam.Map(lambda x: ( taxi.clean_raw_data_dict(x, raw_feature_spec)))) # Examples must be in clean tf-example format. coder = taxi.make_proto_coder(schema) # Prepare arguments for Extract, Evaluate and Write steps extractors = tfma.default_extractors( eval_shared_model=eval_shared_model, slice_spec=slice_spec, desired_batch_size=None, materialize=False) evaluators = tfma.default_evaluators( eval_shared_model=eval_shared_model, desired_batch_size=None, num_bootstrap_samples=1) _ = ( raw_data | 'ToSerializedTFExample' >> beam.Map(coder.encode) | 'Extract Results' >> tfma.InputsToExtracts() | 'Extract and evaluate' >> tfma.ExtractAndEvaluate( extractors=extractors, evaluators=evaluators) | 'Map Evaluations to PCollection' >> MapEvalToPCollection() | 'Measure time: End' >> beam.ParDo( MeasureTime(metrics_namespace)) ) result = pipeline.run() result.wait_until_finish() if metrics_monitor: metrics_monitor.publish_metrics(result)