def test_for_scalar(self): import tensorflow as tf with tf.Graph().as_default(): input1 = tf.placeholder(dtype=tf.float32, shape=()) output = input1 + 1 sess = tf.Session() net = TFNet.from_session(sess, [input1], [output]) sess.close() out_value = net.forward(np.array(1.0)) assert len(out_value.shape) == 0
def predict(model_path, img_path, partition_num=4): inputs = "ToFloat:0" outputs = [ "num_detections:0", "detection_boxes:0", "detection_scores:0", "detection_classes:0" ] model = TFNet(model_path, inputs, outputs) image_set = ImageSet.read(img_path, sc, partition_num) transformer = ChainedPreprocessing([ ImageResize(256, 256), ImageMatToTensor(format="NHWC"), ImageSetToSample() ]) transformed_image_set = image_set.transform(transformer) output = model.predict_image(transformed_image_set.to_image_frame(), batch_per_partition=1) # Print the detection result of the first image. result = ImageSet.from_image_frame(output).get_predict().first() print(result)
def test_tfdataset_with_string_rdd(self): string_rdd = self.sc.parallelize(["123", "456"], 1) ds = TFDataset.from_string_rdd(string_rdd, batch_per_thread=1) input_tensor = tf.placeholder(dtype=tf.string, shape=(None, )) output_tensor = tf.string_to_number(input_tensor) with tf.Session() as sess: tfnet = TFNet.from_session(sess, inputs=[input_tensor], outputs=[output_tensor]) result = tfnet.predict(ds).collect() assert result[0] == 123 assert result[1] == 456
def predict(self, input_fn, checkpoint_path=None): """Outputs predictions for given features. :param input_fn: A function that constructs the features. * A `TFDataset` object, each elements of which is a tuple `(features, None)`. * A `tf.data.Dataset` object: Outputs of `Dataset` object must have same constraints as below. * features: A `tf.Tensor` or a dictionary of string feature name to `Tensor`. features are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. * A tuple, in which case the first item is extracted as features. :param checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. If there are no checkpoints in `model_dir`, prediction is run with newly initialized `Variables` instead of ones restored from checkpoint. Return: Evaluated values of `predictions` tensors. """ with tf.Graph().as_default() as g: result = self.estimator._call_input_fn( input_fn, tf.estimator.ModeKeys.PREDICT) if isinstance(result, TFDataset): spec = self._call_model_fn(result.feature_tensors, None, tf.estimator.ModeKeys.PREDICT, self.config) latest_checkpoint = self.estimator.latest_checkpoint() if latest_checkpoint: checkpoint_path = latest_checkpoint with tf.Session() as sess: if checkpoint_path: saver = tf.train.Saver() saver.restore(sess, checkpoint_path) else: sess.run(tf.global_variables_initializer()) inputs = nest.flatten(result._original_tensors[0]) outputs = nest.flatten(spec.predictions) tfnet = TFNet.from_session(sess, inputs=inputs, outputs=outputs) rdd = result.get_prediction_data() results = tfnet.predict(rdd, result.batch_per_thread) return results return list( self.estimator.predict(input_fn, checkpoint_path=checkpoint_path))
def evaluate(self, input_fn, eval_methods, steps=None, checkpoint_path=None): if not all( isinstance(metric, six.string_types) for metric in eval_methods): raise ValueError("All metrics should be string types") with tf.Graph().as_default() as g: result = self.estimator._call_input_fn(input_fn, tf.estimator.ModeKeys.EVAL) if isinstance(result, TFDataset): spec = self._call_model_fn(result.feature_tensors, result.label_tensors, tf.estimator.ModeKeys.PREDICT, self.config) latest_checkpoint = self.estimator.latest_checkpoint() if latest_checkpoint: checkpoint_path = latest_checkpoint with tf.Session() as sess: if checkpoint_path: saver = tf.train.Saver() saver.restore(sess, checkpoint_path) else: sess.run(tf.global_variables_initializer()) inputs = nest.flatten(result._original_tensors[0]) outputs = nest.flatten(spec.predictions) tfnet = TFNet.from_session(sess, inputs=inputs, outputs=outputs) rdd = result.rdd.map(lambda t: Sample.from_ndarray( nest.flatten(t[0]), nest.flatten(t[1]))) if result.batch_per_thread < 0: batch_size = result.batch_size else: batch_size = result.batch_per_thread * result.rdd.getNumPartitions( ) eval_methods = [ self._to_bigdl_metric(m) for m in eval_methods ] results = tfnet.evaluate(rdd, batch_size, eval_methods) final_result = dict([(r.method, r.result) for r in results]) return final_result return self.estimator.evaluate(input_fn, steps, checkpoint_path=checkpoint_path)
def test_tf_net_predict(self): resource_path = os.path.join( os.path.split(__file__)[0], "../../resources") tfnet_path = os.path.join(resource_path, "tfnet") import tensorflow as tf tf_session_config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) net = TFNet.from_export_folder(tfnet_path, tf_session_config=tf_session_config) output = net.predict(np.random.rand(16, 4), batch_per_thread=5, distributed=False) assert output.shape == (16, 2)
def test_init_tfnet_from_session(self): import tensorflow as tf input1 = tf.placeholder(dtype=tf.float32, shape=(None, 2)) label1 = tf.placeholder(dtype=tf.float32, shape=(None, 2)) hidden = tf.layers.dense(input1, 4) output = tf.layers.dense(hidden, 1) loss = tf.reduce_mean(tf.square(output - label1)) train_op = tf.train.GradientDescentOptimizer(1e-3).minimize(loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) data = np.random.rand(2, 2) output_value_ref = sess.run(output, feed_dict={input1: data}) net = TFNet.from_session(sess, [input1], [output]) output_value = net.forward(data) self.assert_allclose(output_value, output_value_ref)
def _evaluate_distributed(self, dataset): tfnet = TFNet.from_session(K.get_session(), inputs=self.model.inputs, outputs=self.model.outputs) data = dataset.get_evaluation_data() if dataset.batch_per_thread < 0: batch_size = dataset.batch_size else: batch_size = dataset.batch_per_thread * dataset.get_num_partitions( ) eval_methods = [ to_bigdl_metric(m, self.model.loss) for m in self.metrics_names ] results = tfnet.evaluate(data, batch_size, eval_methods) final_result = [r.result for r in results] return final_result
def evaluate(self, input_fn, eval_methods, steps=None, checkpoint_path=None): """Evaluates the model given evaluation data `input_fn`. :param input_fn: A function that constructs the input data for evaluation. The function should construct and return one of the following: * A `TFDataset` object, each elements of which is a tuple `(features, labels)`. * A `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple `(features, labels)` with same constraints as below. * A tuple `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both `features` and `labels` are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. :param eval_methods: a list of strings to specify the evaluation metrics to be used in this model :param steps: Number of steps for which to evaluate model. :param checkpoint_path: Path of a specific checkpoint to evaluate. If `None`, the latest checkpoint in `model_dir` is used. If there are no checkpoints in `model_dir`, evaluation is run with newly initialized `Variables` instead of ones restored from checkpoint. Returns: A dict containing the evaluation metrics specified in `model_fn` keyed by name. """ if not all( isinstance(metric, six.string_types) for metric in eval_methods): raise ValueError("All metrics should be string types") with tf.Graph().as_default() as g: result = self.estimator._call_input_fn(input_fn, tf.estimator.ModeKeys.EVAL) if isinstance(result, TFDataset): spec = self._call_model_fn(result.feature_tensors, result.label_tensors, tf.estimator.ModeKeys.PREDICT, self.config) latest_checkpoint = self.estimator.latest_checkpoint() if latest_checkpoint: checkpoint_path = latest_checkpoint with tf.Session() as sess: if checkpoint_path: saver = tf.train.Saver() saver.restore(sess, checkpoint_path) else: sess.run(tf.global_variables_initializer()) inputs = nest.flatten(result._original_tensors[0]) outputs = nest.flatten(spec.predictions) tfnet = TFNet.from_session(sess, inputs=inputs, outputs=outputs) data = result.get_evaluation_data() if result.batch_per_thread < 0: batch_size = result.batch_size else: batch_size = result.batch_per_thread * result.get_num_partitions( ) eval_methods = [ self._to_bigdl_metric(m) for m in eval_methods ] results = tfnet.evaluate(data, batch_size, eval_methods) final_result = dict([(r.method, r.result) for r in results]) return final_result return self.estimator.evaluate(input_fn, steps, checkpoint_path=checkpoint_path)
logits, end_points = inception_v1(images, num_classes=1001) sess = tf.Session() saver = tf.train.Saver() saver.restore(sess, "file:///home/hduser/slim/checkpoint/inception_v1.ckpt") #saver.restore(sess, "hdfs:///slim/checkpoint/inception_v1.ckpt") # You need to edit this path to the checkpoint you downloaded from zoo.util.tf import export_tf avg_pool = end_points['Mixed_3c'] export_tf(sess, "file:///home/hduser/slim/tfnet/", inputs=[images], outputs=[avg_pool]) from zoo.pipeline.api.net import TFNet amodel = TFNet.from_export_folder("file:///home/hduser/slim/tfnet/") from bigdl.nn.layer import Sequential, Transpose, Contiguous, Linear, ReLU, SoftMax, Reshape, View, MulConstant, SpatialAveragePooling full_model = Sequential() full_model.add(Transpose([(2, 4), (2, 3)])) scalar = 1. / 255 full_model.add(MulConstant(scalar)) full_model.add(Contiguous()) full_model.add(amodel) full_model.add(View([1024])) full_model.add(Linear(1024, 5)) import re from bigdl.nn.criterion import CrossEntropyCriterion from pyspark import SparkConf from pyspark.ml import Pipeline from pyspark.sql import SQLContext from pyspark.sql.functions import col, udf
if __name__ == '__main__': sparkConf = init_spark_conf().setAppName("testNNClassifer").setMaster('local[1]') sc = init_nncontext(sparkConf) spark = SparkSession \ .builder \ .getOrCreate() with tf.Graph().as_default(): input1 = tf.placeholder(dtype=tf.float32, shape=(None, 2)) hidden = tf.layers.dense(input1, 4) output = tf.sigmoid(tf.layers.dense(hidden, 1)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) net = TFNet.from_session(sess, [input1], [output], generate_backward=True) df = spark.createDataFrame( [(Vectors.dense([2.0, 1.0]), 1.0), (Vectors.dense([1.0, 2.0]), 0.0), (Vectors.dense([2.0, 1.0]), 1.0), (Vectors.dense([1.0, 2.0]), 0.0)], ["features", "label"]) print("before training:") NNModel(net).transform(df).show() classifier = NNClassifier(net, MSECriterion()) \ .setBatchSize(4) \ .setOptimMethod(Adam()) \ .setLearningRate(0.1) \
model = Model(config) # init or get SparkContext sc = init_nncontext() # model_dir = config.model_dir model_dir = find_latest_dir(os.path.join(config.model, 'model_save/')) # export a TensorFlow model to frozen inference graph. with tf.Session() as sess: saver = tf.train.Saver() saver.restore(sess, os.path.join(model_dir, config.model)) tfnet = TFNet.from_session( sess, inputs=[model.input_x, model.memories], # dropout is never used outputs=[model.predictions]) data_x_rdd = sc.parallelize(test_x, PARALLELISM) data_m_rdd = sc.parallelize(test_m, PARALLELISM) # create a RDD of Sample sample_rdd = data_x_rdd.zip(data_m_rdd).map( lambda x: Sample.from_ndarray(features=x, labels=np.zeros([1]))) # distributed inference on Spark and return an RDD outputs = tfnet.predict(sample_rdd, batch_per_thread=config.batch_size, distributed=True) # check time when trigger actions
def test_init_tf_net(self): resource_path = os.path.join(os.path.split(__file__)[0], "../../resources") tfnet_path = os.path.join(resource_path, "tfnet") net = TFNet.from_export_folder(tfnet_path) output = net.forward(np.random.rand(2, 4)) assert output.shape == (2, 2)