def _remove_metrics(estimator: tf.estimator.Estimator, metrics_to_remove: Union[List[Text], Callable[[Text], bool]]): """Modifies the Estimator to make its model_fn return less metrics in EVAL. Note that this only removes the metrics from the EstimatorSpec.eval_metric_ops. It does not remove them from the graph or undo any side-effects that they might have had (e.g. modifications to METRIC_VARIABLES collections). This is useful for when you use py_func, streaming metrics, or other metrics incompatible with TFMA in your trainer. To keep these metrics in your trainer (so they still show up in Tensorboard) and still use TFMA, you can call remove_metrics on your Estimator before calling export_eval_savedmodel. This is a context manager, so it can be used like: with _remove_metrics(estimator, ['streaming_auc']): tfma.export.export_eval_savedmodel(estimator, ...) Args: estimator: tf.estimator.Estimator to modify. Will be mutated in place. metrics_to_remove: List of names of metrics to remove. Yields: Nothing. """ old_call_model_fn = estimator._call_model_fn # pylint: disable=protected-access def wrapped_call_model_fn(unused_self, features, labels, mode, config): result = old_call_model_fn(features, labels, mode, config) if mode == tf.estimator.ModeKeys.EVAL: filtered_eval_metric_ops = {} for k, v in result.eval_metric_ops.items(): if isinstance(metrics_to_remove, collections.Iterable): if k in metrics_to_remove: continue elif callable(metrics_to_remove): if metrics_to_remove(k): continue filtered_eval_metric_ops[k] = v result = result._replace(eval_metric_ops=filtered_eval_metric_ops) return result estimator._call_model_fn = types.MethodType( # pylint: disable=protected-access wrapped_call_model_fn, estimator) yield estimator._call_model_fn = old_call_model_fn # pylint: disable=protected-access
def get_predictions(model: tf.estimator.Estimator, ds: tf.data.Dataset): '''Retrieve predictions from model.''' preds = model.predict(train_fn(ds, shuffle=False, repeat=1)) preds = list(preds) probabilities = np.vstack(pred["probabilities"] for pred in preds) class_ids = np.hstack(pred["class_ids"] for pred in preds) return probabilities, class_ids
def get_final_predictions(in_contexts, in_last_sentences, tokenizer, estimator: tf.estimator.Estimator, label_list): """ Return the log probabilities based on the story context and the endings proposed Parameters ---------- in_contexts: str of the story context in_last_sentences: proposed last sentence tokenizer: bert tokenizer estimator: tf.estimator label_list: possible values """ input_examples = [ run_classifier.InputExample(guid="", text_a=x, text_b=y, label=0) for x, y in zip(in_contexts, in_last_sentences) ] # here, "" is just a dummy label input_features = run_classifier.convert_examples_to_features( input_examples, label_list, flags.max_seq_length, tokenizer) predict_input_fn = run_classifier.input_fn_builder( features=input_features, seq_length=flags.max_seq_length, is_training=False, drop_remainder=False) predictions = estimator.predict(predict_input_fn) predictions = [prediction['probabilities'] for prediction in predictions] return predictions
def fit_model_on_fold(self, compiled_model: tf.estimator.Estimator, curr_fold_indices, train_sequences, test_sequences): """ trains compiled (but previously unfitted) model against given indices :param compiled_model: :param curr_fold_indices: :param train_sequences: :param test_sequences: :return: """ def train_input_fn(features, labels, batch_size): dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels)) dataset = dataset.shuffle(10000).repeat().batch(batch_size) return dataset def eval_input_fn(features, labels, batch_size): """use for both validation and prediction""" features = dict(features) if labels is None: inputs = features else: inputs = (features, labels) dataset = tf.data.Dataset.from_tensor_slices(inputs) return dataset.batch(batch_size) train_indices, val_indices = curr_fold_indices x_train = {'sequence': train_sequences[train_indices]} y_train = self.raw_train_df[self.target_cols].iloc[train_indices].values x_val = {'sequence': train_sequences[val_indices]} y_val = self.raw_train_df[self.target_cols].iloc[val_indices].values compiled_model.train(input_fn=lambda: train_input_fn(x_train, y_train, self.batch_size), steps=self.epochs * len(train_indices) // self.batch_size,) lambda_input_fn = lambda: eval_input_fn(x_val, None, self.batch_size) val_predictions = compiled_model.predict(lambda_input_fn) val_prob = np.array([x['probabilities'] for x in val_predictions]) val_roc_auc_score = roc_auc_score(y_val, val_prob) print('ROC-AUC val score: {0:.4f}'.format(val_roc_auc_score)) x_test = {'sequence': test_sequences} lambda_input_fn = lambda: eval_input_fn(x_test, None, self.batch_size) test_predictions = compiled_model.predict(input_fn=lambda_input_fn) test_prob = np.array([x['probabilities'] for x in test_predictions]) return val_roc_auc_score, test_prob
def _export_model(self, estimator: tf.estimator.Estimator, save_location: str) -> None: """ Used to export your model in a format that can be used with Tf.Serving :param estimator: your estimator function """ # this should match the input shape of your model # TODO: update this to your input used in prediction/serving x1 = tf.feature_column.numeric_column("input", shape=[480, 640, 1]) # create a list in case you have more than one input feature_columns = [x1] feature_spec = tf.feature_column.make_parse_example_spec( feature_columns) export_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( feature_spec) # export the saved model estimator.export_savedmodel(save_location, export_input_fn)
def _predict(self, estimator: tf.estimator.Estimator, pred_fn: Callable) -> list: """ Function to yield prediction results from the model :param estimator: your estimator function :param pred_fn: input_fn associated with prediction dataset :return: a list containing a prediction for each batch in the dataset """ return list(estimator.predict(input_fn=pred_fn))
def export(self, estimator: tf.estimator.Estimator): for variable_name in self.variable_names: variable_export_dir = Path(self.path_variables, variable_name) LOGGER.info( f"Saving variable {variable_name} to {variable_export_dir}") with ParquetDataset(variable_export_dir).open() as ds: variable_value = estimator.get_variable_value(variable_name) ds.write_pandas(pd.DataFrame(variable_value), compression=self.compression, chunk_size=self.chunk_size)
def export(self, estimator: tf.estimator.Estimator): # Reload summaries and select best step LOGGER.info(f"Reloading summaries from {estimator.model_dir}") summaries = read_eval_metrics(estimator.eval_dir()).items() for step, metrics in sorted(summaries): LOGGER.info(f"- {step}: {metrics}") sorted_summaries = sorted(summaries, key=lambda t: t[1][self.metric]) if self.mode == BestMode.INCREASE: best_step, best_metrics = sorted_summaries[-1] elif self.mode == BestMode.DECREASE: best_step, best_metrics = sorted_summaries[0] else: raise ValueError(f"Mode {self.mode} not recognized.") LOGGER.info(f"Best summary at step {best_step}: {best_metrics}") # List available checkpoints and select closes to best_step checkpoints = Path(estimator.model_dir).glob(_CHEKPOINT_PATTERN) checkpoint_steps = [ int(re.findall(r"-(\d+).index", str(path))[0]) for path in checkpoints ] selected_step = sorted(checkpoint_steps, key=lambda step: abs(step - best_step))[0] LOGGER.info(f"Selected checkpoint {selected_step}") # Change checkpoint information with Path(estimator.model_dir, "checkpoint").open("r") as file: lines = file.read().split("\n") lines[0] = f'model_checkpoint_path: "model.ckpt-{selected_step}"' with Path(estimator.model_dir, "checkpoint").open("w") as file: file.write("\n".join(lines)) # Check that change is effective global_step = estimator.get_variable_value("global_step") if not global_step == selected_step: msg = f"Changed checkpoint file to use step {selected_step}, but estimator uses {global_step}" raise ValueError(msg) # Log to MLFlow if self.use_mlflow: mlflow.log_metric(key=self.tag, value=global_step)
def _export_model(self, estimator: tf.estimator.Estimator, save_location: str) -> None: """ Used to export your model in a format that can be used with Tf.Serving :param estimator: your estimator function """ # this should match the input shape of your model x1 = tf.feature_column.numeric_column( "input", shape=[ self.config["train_batch_size"], conf.c_maxnum, conf.c_maxlen ]) # create a list in case you have more than one input ## feature_columns = [x1] feature_columns = x1 feature_spec = tf.feature_column.make_parse_example_spec( feature_columns) export_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn( feature_spec) # export the saved model estimator.export_savedmodel(save_location, export_input_fn)
def evaluate(model: tf.estimator.Estimator, features: pd.DataFrame, labels: pd.DataFrame, steps: int = None): '''Check the mse on the validation set.''' ds = make_dataset(features, labels) results = model.evaluate(train_fn(ds, shuffle=False, repeat=1), steps=steps) for stat_name, stat_value in results.items(): print(f"{stat_name:>20} | {stat_value}") return results
def train_estimator(estimator: tf.estimator.Estimator, input_config, train_config, export_config, task_config: TrainTaskConfig): example_config = input_config['example_config'] label_col = input_config['label_col'] feature_specs = parse_feature_specs(example_config) dataset_fn = get_dataset_fn(feature_specs=feature_specs, label_col=label_col, **train_config) train_spec = tf.estimator.TrainSpec(dataset_fn) eval_spec = tf.estimator.EvalSpec(dataset_fn, steps=1) tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) feature_placeholders = get_feature_placeholders(**export_config) serving_input_receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn( feature_placeholders) if (task_config.task_type == 'chief' and task_config.task_index == 0) or \ (task_config.num_workers == 1): logging.info("Start exporting...") estimator.export_saved_model( task_config.saved_model_dir, serving_input_receiver_fn=serving_input_receiver_fn) logging.info("Finish exporting.")
def __call__( self, estimator: tf.estimator.Estimator) -> tf.estimator.SessionRunHook: if estimator.config.is_chief: return _StopOnPredicateHook( partial( _no_metric_improvement_fn, eval_dir=estimator.eval_dir(), min_steps=self.min_steps, metric=self.metric, max_steps_without_improvement=self. max_steps_without_improvement, mode=self.mode, ), run_every_secs=self.run_every_secs, run_every_steps=self.run_every_steps, final_step=self.final_step, ) else: return _CheckForStoppingHook()
def _get_predictions(self, estimator: tf.estimator.Estimator, eval_fn: Callable[[], Dict[str, tf.Tensor]]) -> MoleculePredictions: collect_edges = CollectTensorHook('adjacency_in:0') collect_nodes = CollectTensorHook('features:0') predictions = estimator.predict(eval_fn, hooks=[collect_edges, collect_nodes]) pred = collect_predictions(predictions) feat = np.stack(pred['reconstructed/features'], axis=0) adj = np.stack(pred['reconstructed/adjacency'], axis=0) feat, adj = onehot_to_dense(feat, adj) mols_recon = MoleculeGraph(nodes=feat, edges=adj) mols_real = MoleculeGraph( nodes=np.row_stack(collect_nodes.data), edges=np.row_stack(collect_edges.data)) return MoleculePredictions(inputs=mols_real, embeddings=np.row_stack(pred['embedding']), reconstructions=mols_recon)
def export(self, estimator: tf.estimator.Estimator): features = {field.name: field.as_placeholder(batch=True) for field in self.fields} return estimator.export_saved_model( self.path_saved_model, tf.estimator.export.build_raw_serving_input_receiver_fn(features) )