def predict(datasource, select, data_table, result_table, label_column, oss_model_path): """PAI TensorFlow prediction wrapper This function do some preparation for the local prediction, say, download the model from OSS, extract metadata and so on. Args: datasource: the datasource from which to get data select: data selection SQL statement data_table: tmp table which holds the data from select result_table: table to save prediction result label_column: prediction label column oss_model_path: the model path on OSS """ try: tf.enable_eager_execution() except: # noqa: E722 pass (estimator, feature_column_names, feature_column_names_map, feature_metas, label_meta, model_params, feature_columns_code) = oss.load_metas(oss_model_path, "tensorflow_model_desc") feature_columns = eval(feature_columns_code) # NOTE(typhoonzero): No need to eval model_params["optimizer"] and # model_params["loss"] because predicting do not need these parameters. is_estimator = is_tf_estimator(import_model(estimator)) # Keras single node is using h5 format to save the model, no need to deal # with export model format. Keras distributed mode will use estimator, so # this is also needed. if is_estimator: oss.load_file(oss_model_path, "exported_path") # NOTE(typhoonzero): directory "model_save" is hardcoded in # codegen/tensorflow/codegen.go oss.load_dir("%s/model_save" % oss_model_path) else: oss.load_file(oss_model_path, "model_save") _predict(datasource=datasource, estimator_string=estimator, select=select, result_table=result_table, feature_columns=feature_columns, feature_column_names=feature_column_names, feature_column_names_map=feature_column_names_map, train_label_name=label_meta["feature_name"], result_col_name=label_column, feature_metas=feature_metas, model_params=model_params, save="model_save", batch_size=1, pai_table=data_table)
def explain_step(datasource, select, data_table, result_table, label_column, oss_model_path): try: tf.enable_eager_execution() except Exception as e: sys.stderr.write("warning: failed to enable_eager_execution: %s" % e) pass (estimator, feature_column_names, feature_column_names_map, feature_metas, label_meta, model_params, feature_columns_code) = oss.load_metas(oss_model_path, "tensorflow_model_desc") fc_map_ir = feature_columns_code feature_columns = compile_ir_feature_columns(fc_map_ir, EstimatorType.TENSORFLOW) field_descs = get_ordered_field_descs(fc_map_ir) feature_column_names = [fd.name for fd in field_descs] feature_metas = dict([(fd.name, fd.to_dict()) for fd in field_descs]) # NOTE(typhoonzero): No need to eval model_params["optimizer"] and # model_params["loss"] because predicting do not need these parameters. is_estimator = is_tf_estimator(import_model(estimator)) # Keras single node is using h5 format to save the model, no need to deal # with export model format. Keras distributed mode will use estimator, so # this is also needed. model_name = oss_model_path.split("/")[-1] if is_estimator: oss.load_file(oss_model_path, "exported_path") # NOTE(typhoonzero): directory "model_save" is hardcoded in # codegen/tensorflow/codegen.go oss.load_dir("%s/%s" % (oss_model_path, model_name)) else: oss.load_dir(os.path.join(oss_model_path, "model_save")) # (TODO: lhw) use oss to store result image _explain(datasource=datasource, estimator_string=estimator, select=select, feature_columns=feature_columns, feature_column_names=feature_column_names, feature_metas=feature_metas, label_meta=label_meta, model_params=model_params, save="model_save", result_table=result_table, pai_table=data_table, oss_dest=None, oss_ak=None, oss_sk=None, oss_endpoint=None, oss_bucket_name=None)
def load_oss_model(oss_model_dir, estimator): is_estimator = is_tf_estimator(estimator) # Keras single node is using h5 format to save the model, no need to deal # with export model format. Keras distributed mode will use estimator, so # this is also needed. if is_estimator: load_file(oss_model_dir, "exported_path") # NOTE(typhoonzero): directory "model_save" is hardcoded in # codegen/tensorflow/codegen.go load_dir(os.path.join(oss_model_dir, "model_save"))
def evaluate(datasource, select, data_table, result_table, oss_model_path, metrics): """PAI Tensorflow evaluate wrapper This function do some preparation for the local evaluation, say, download the model from OSS, extract metadata and so on. Args: datasource: the datasource from which to get data select: data selection SQL statement data_table: tmp table which holds the data from select result_table: table to save prediction result oss_model_path: the model path on OSS metrics: metrics to evaluate """ (estimator, feature_column_names, feature_column_names_map, feature_metas, label_meta, model_params, feature_columns_code) = oss.load_metas(oss_model_path, "tensorflow_model_desc") feature_columns = eval(feature_columns_code) # NOTE(typhoonzero): No need to eval model_params["optimizer"] and # model_params["loss"] because predicting do not need these parameters. is_estimator = is_tf_estimator(import_model(estimator)) # Keras single node is using h5 format to save the model, no need to deal # with export model format. Keras distributed mode will use estimator, so # this is also needed. if is_estimator: oss.load_file(oss_model_path, "exported_path") # NOTE(typhoonzero): directory "model_save" is hardcoded in # codegen/tensorflow/codegen.go oss.load_dir("%s/model_save" % oss_model_path) else: oss.load_file(oss_model_path, "model_save") _evaluate(datasource=datasource, estimator_string=estimator, select=select, result_table=result_table, feature_columns=feature_columns, feature_column_names=feature_column_names, feature_metas=feature_metas, label_meta=label_meta, model_params=model_params, validation_metrics=metrics, save="model_save", batch_size=1, validation_steps=None, verbose=0, is_pai=True, pai_table=data_table)
def load_oss_model(oss_model_dir, estimator): set_oss_environs(FLAGS) is_estimator = is_tf_estimator(estimator) # Keras single node is using h5 format to save the model, no need to deal with export model format. # Keras distributed mode will use estimator, so this is also needed. if is_estimator: model.load_file(oss_model_dir, "exported_path") # NOTE(typhoonzero): directory "model_save" is hardcoded in codegen/tensorflow/codegen.go model.load_dir(oss_model_dir + "/model_save") else: model.load_file(oss_model_dir, "model_save")
def _evaluate(datasource, estimator_string, select, result_table, feature_columns, feature_column_names, feature_metas={}, label_meta={}, model_params={}, validation_metrics=["Accuracy"], save="", batch_size=1, validation_steps=None, verbose=0, pai_table=""): estimator_cls = import_model(estimator_string) is_estimator = is_tf_estimator(estimator_cls) set_log_level(verbose, is_estimator) eval_dataset = get_dataset_fn(select, datasource, feature_column_names, feature_metas, label_meta, is_pai=True, pai_table=pai_table, batch_size=batch_size) model_params.update(feature_columns) if is_estimator: FLAGS = tf.app.flags.FLAGS model_params["model_dir"] = FLAGS.checkpointDir estimator = estimator_cls(**model_params) result_metrics = estimator_evaluate(estimator, eval_dataset, validation_metrics) else: keras_model = init_model_with_feature_column(estimator, model_params) keras_model_pkg = sys.modules[estimator_cls.__module__] result_metrics = keras_evaluate(keras_model, eval_dataset, save, keras_model_pkg, validation_metrics) if result_table: metric_name_list = ["loss"] + validation_metrics write_result_metrics(result_metrics, metric_name_list, result_table, "pai_maxcompute", None, hdfs_namenode_addr="", hive_location="", hdfs_user="", hdfs_pass="")
def explain(datasource, estimator_string, select, feature_columns, feature_column_names, feature_metas={}, label_meta={}, model_params={}, save="", pai_table="", plot_type='bar', result_table="", oss_dest=None, oss_ak=None, oss_sk=None, oss_endpoint=None, oss_bucket_name=None): estimator_cls = import_model(estimator_string) if is_tf_estimator(estimator_cls): model_params['model_dir'] = save model_params.update(feature_columns) pop_optimizer_and_loss(model_params) def _input_fn(): dataset = input_fn(select, datasource, feature_column_names, feature_metas, label_meta) return dataset.batch(1).cache() estimator = init_model_with_feature_column(estimator_cls, model_params) conn = connect_with_data_source(datasource) if estimator_cls in (tf.estimator.BoostedTreesClassifier, tf.estimator.BoostedTreesRegressor): explain_boosted_trees(datasource, estimator, _input_fn, plot_type, result_table, feature_column_names, conn, oss_dest, oss_ak, oss_sk, oss_endpoint, oss_bucket_name) else: shap_dataset = pd.DataFrame(columns=feature_column_names) for i, (features, label) in enumerate(_input_fn()): shap_dataset.loc[i] = [ item.numpy()[0][0] for item in features.values() ] explain_dnns(datasource, estimator, shap_dataset, plot_type, result_table, feature_column_names, conn, oss_dest, oss_ak, oss_sk, oss_endpoint, oss_bucket_name) conn.close()
def _predict(datasource, estimator_string, select, result_table, feature_columns, feature_column_names, feature_column_names_map, train_label_name, result_col_name, feature_metas={}, model_params={}, save="", batch_size=1, pai_table=""): estimator = import_model(estimator_string) model_params.update(feature_columns) is_estimator = is_tf_estimator(estimator) conn = PaiIOConnection.from_table(pai_table) selected_cols = db.selected_cols(conn, None) predict_generator = db.db_generator(conn, None) pop_optimizer_and_loss(model_params) if not is_estimator: if not issubclass(estimator, tf.keras.Model): # functional model need field_metas parameter model_params["field_metas"] = feature_metas print("Start predicting using keras model...") keras_predict(estimator, model_params, save, result_table, feature_column_names, feature_metas, train_label_name, result_col_name, conn, predict_generator, selected_cols) else: model_params['model_dir'] = save print("Start predicting using estimator model...") estimator_predict(result_table, feature_column_names, feature_metas, train_label_name, result_col_name, conn, predict_generator, selected_cols) print("Done predicting. Predict table : %s" % result_table)
def save_oss_model(oss_model_dir, estimator, num_workers, feature_column_names, feature_column_names_map, feature_metas, label_meta, model_params, feature_columns_code): is_estimator = is_tf_estimator(estimator) # Keras single node is using h5 format to save the model, no need to deal with export model format. # Keras distributed mode will use estimator, so this is also needed. if is_estimator: if FLAGS.task_index == 0: with open("exported_path", "r") as fn: saved_model_path = fn.read() model.save_dir(oss_model_dir, saved_model_path) model.save_file(oss_model_dir, "exported_path") else: if len(FLAGS.worker_hosts.split(",")) > 1: if FLAGS.task_index == 0: model.save_file(oss_model_dir, "exported_path") else: model.save_file(oss_model_dir, "model_save") model.save_metas(oss_model_dir, num_workers, "tensorflow_model_desc", estimator, feature_column_names, feature_column_names_map, feature_metas, label_meta, model_params, feature_columns_code)
def explain(datasource, estimator_string, select, feature_columns, feature_column_names, feature_metas={}, label_meta={}, model_params={}, save="", pai_table="", plot_type='bar', result_table="", oss_dest=None, oss_ak=None, oss_sk=None, oss_endpoint=None, oss_bucket_name=None): estimator_cls = import_model(estimator_string) is_pai = True if pai_table else False if is_pai: FLAGS = tf.app.flags.FLAGS model_params["model_dir"] = FLAGS.checkpointDir select = "" else: if is_tf_estimator(estimator_cls): model_params['model_dir'] = save model_params.update(feature_columns) pop_optimizer_and_loss(model_params) def _input_fn(): dataset = input_fn(select, datasource, feature_column_names, feature_metas, label_meta, is_pai=is_pai, pai_table=pai_table) return dataset.batch(1).cache() estimator = init_model_with_feature_column(estimator_cls, model_params) if not is_tf_estimator(estimator_cls): load_keras_model_weights(estimator, save) if is_pai: conn = PaiIOConnection.from_table( result_table) if result_table else None else: conn = connect_with_data_source(datasource) if estimator_cls in (tf.estimator.BoostedTreesClassifier, tf.estimator.BoostedTreesRegressor): explain_boosted_trees(datasource, estimator, _input_fn, plot_type, result_table, feature_column_names, conn, oss_dest, oss_ak, oss_sk, oss_endpoint, oss_bucket_name) else: shap_dataset = pd.DataFrame(columns=feature_column_names) for i, (features, label) in enumerate(_input_fn()): shap_dataset.loc[i] = [ item.numpy()[0][0] for item in features.values() ] explain_dnns(datasource, estimator, shap_dataset, plot_type, result_table, feature_column_names, conn, oss_dest, oss_ak, oss_sk, oss_endpoint, oss_bucket_name) if conn is not None: conn.close()
def _predict(datasource, estimator_string, select, result_table, feature_columns, feature_column_names, feature_column_names_map, train_label_name, result_col_name, feature_metas={}, model_params={}, save="", batch_size=1, pai_table=""): estimator = import_model(estimator_string) model_params.update(feature_columns) is_estimator = is_tf_estimator(estimator) conn = None driver = "pai_maxcompute" pai_table_parts = pai_table.split(".") formatted_pai_table = "odps://%s/tables/%s" % (pai_table_parts[0], pai_table_parts[1]) selected_cols = db.pai_selected_cols(formatted_pai_table) predict_generator = db.pai_maxcompute_db_generator(formatted_pai_table) if not is_estimator: if not issubclass(estimator, tf.keras.Model): # functional model need field_metas parameter model_params["field_metas"] = feature_metas print("Start predicting using keras model...") keras_predict(estimator, model_params, save, result_table, feature_column_names, feature_metas, train_label_name, result_col_name, driver, conn, predict_generator, selected_cols, hdfs_namenode_addr="", hive_location="", hdfs_user="", hdfs_pass="") else: model_params['model_dir'] = save print("Start predicting using estimator model...") estimator_predict(estimator, model_params, save, result_table, feature_column_names, feature_column_names_map, feature_columns, feature_metas, train_label_name, result_col_name, driver, conn, predict_generator, selected_cols, hdfs_namenode_addr="", hive_location="", hdfs_user="", hdfs_pass="") print("Done predicting. Predict table : %s" % result_table)