Пример #1
0
 def test_for_scalar(self):
     import tensorflow as tf
     with tf.Graph().as_default():
         input1 = tf.placeholder(dtype=tf.float32, shape=())
         output = input1 + 1
         sess = tf.Session()
         net = TFNet.from_session(sess, [input1], [output])
         sess.close()
     out_value = net.forward(np.array(1.0))
     assert len(out_value.shape) == 0
Пример #2
0
def predict(model_path, img_path, partition_num=4):
    inputs = "ToFloat:0"
    outputs = [
        "num_detections:0", "detection_boxes:0", "detection_scores:0",
        "detection_classes:0"
    ]

    model = TFNet(model_path, inputs, outputs)
    image_set = ImageSet.read(img_path, sc, partition_num)
    transformer = ChainedPreprocessing([
        ImageResize(256, 256),
        ImageMatToTensor(format="NHWC"),
        ImageSetToSample()
    ])
    transformed_image_set = image_set.transform(transformer)
    output = model.predict_image(transformed_image_set.to_image_frame(),
                                 batch_per_partition=1)
    # Print the detection result of the first image.
    result = ImageSet.from_image_frame(output).get_predict().first()
    print(result)
Пример #3
0
 def test_tfdataset_with_string_rdd(self):
     string_rdd = self.sc.parallelize(["123", "456"], 1)
     ds = TFDataset.from_string_rdd(string_rdd, batch_per_thread=1)
     input_tensor = tf.placeholder(dtype=tf.string, shape=(None, ))
     output_tensor = tf.string_to_number(input_tensor)
     with tf.Session() as sess:
         tfnet = TFNet.from_session(sess,
                                    inputs=[input_tensor],
                                    outputs=[output_tensor])
     result = tfnet.predict(ds).collect()
     assert result[0] == 123
     assert result[1] == 456
Пример #4
0
    def predict(self, input_fn, checkpoint_path=None):
        """Outputs predictions for given features.

        :param input_fn: A function that constructs the features.
              * A `TFDataset` object, each elements of which is a tuple `(features, None)`.
              * A `tf.data.Dataset` object: Outputs of `Dataset` object must have
                same constraints as below.
              * features: A `tf.Tensor` or a dictionary of string feature name to
                `Tensor`. features are consumed by `model_fn`. They should satisfy
                the expectation of `model_fn` from inputs.
              * A tuple, in which case the first item is extracted as features.

        :param checkpoint_path: Path of a specific checkpoint to predict. If `None`, the
            latest checkpoint in `model_dir` is used.  If there are no checkpoints
            in `model_dir`, prediction is run with newly initialized `Variables`
            instead of ones restored from checkpoint.


        Return:
          Evaluated values of `predictions` tensors.

        """
        with tf.Graph().as_default() as g:
            result = self.estimator._call_input_fn(
                input_fn, tf.estimator.ModeKeys.PREDICT)
            if isinstance(result, TFDataset):
                spec = self._call_model_fn(result.feature_tensors, None,
                                           tf.estimator.ModeKeys.PREDICT,
                                           self.config)
                latest_checkpoint = self.estimator.latest_checkpoint()

                if latest_checkpoint:
                    checkpoint_path = latest_checkpoint

                with tf.Session() as sess:
                    if checkpoint_path:
                        saver = tf.train.Saver()
                        saver.restore(sess, checkpoint_path)
                    else:
                        sess.run(tf.global_variables_initializer())
                    inputs = nest.flatten(result._original_tensors[0])
                    outputs = nest.flatten(spec.predictions)
                    tfnet = TFNet.from_session(sess,
                                               inputs=inputs,
                                               outputs=outputs)

                    rdd = result.get_prediction_data()

                    results = tfnet.predict(rdd, result.batch_per_thread)
                    return results

        return list(
            self.estimator.predict(input_fn, checkpoint_path=checkpoint_path))
Пример #5
0
    def evaluate(self,
                 input_fn,
                 eval_methods,
                 steps=None,
                 checkpoint_path=None):
        if not all(
                isinstance(metric, six.string_types)
                for metric in eval_methods):
            raise ValueError("All metrics should be string types")
        with tf.Graph().as_default() as g:
            result = self.estimator._call_input_fn(input_fn,
                                                   tf.estimator.ModeKeys.EVAL)
            if isinstance(result, TFDataset):
                spec = self._call_model_fn(result.feature_tensors,
                                           result.label_tensors,
                                           tf.estimator.ModeKeys.PREDICT,
                                           self.config)
                latest_checkpoint = self.estimator.latest_checkpoint()

                if latest_checkpoint:
                    checkpoint_path = latest_checkpoint

                with tf.Session() as sess:
                    if checkpoint_path:
                        saver = tf.train.Saver()
                        saver.restore(sess, checkpoint_path)
                    else:
                        sess.run(tf.global_variables_initializer())
                    inputs = nest.flatten(result._original_tensors[0])
                    outputs = nest.flatten(spec.predictions)
                    tfnet = TFNet.from_session(sess,
                                               inputs=inputs,
                                               outputs=outputs)

                    rdd = result.rdd.map(lambda t: Sample.from_ndarray(
                        nest.flatten(t[0]), nest.flatten(t[1])))
                    if result.batch_per_thread < 0:
                        batch_size = result.batch_size
                    else:
                        batch_size = result.batch_per_thread * result.rdd.getNumPartitions(
                        )

                    eval_methods = [
                        self._to_bigdl_metric(m) for m in eval_methods
                    ]
                    results = tfnet.evaluate(rdd, batch_size, eval_methods)
                    final_result = dict([(r.method, r.result)
                                         for r in results])
                    return final_result

        return self.estimator.evaluate(input_fn,
                                       steps,
                                       checkpoint_path=checkpoint_path)
Пример #6
0
 def test_tf_net_predict(self):
     resource_path = os.path.join(
         os.path.split(__file__)[0], "../../resources")
     tfnet_path = os.path.join(resource_path, "tfnet")
     import tensorflow as tf
     tf_session_config = tf.ConfigProto(inter_op_parallelism_threads=1,
                                        intra_op_parallelism_threads=1)
     net = TFNet.from_export_folder(tfnet_path,
                                    tf_session_config=tf_session_config)
     output = net.predict(np.random.rand(16, 4),
                          batch_per_thread=5,
                          distributed=False)
     assert output.shape == (16, 2)
Пример #7
0
    def test_init_tfnet_from_session(self):
        import tensorflow as tf
        input1 = tf.placeholder(dtype=tf.float32, shape=(None, 2))
        label1 = tf.placeholder(dtype=tf.float32, shape=(None, 2))
        hidden = tf.layers.dense(input1, 4)
        output = tf.layers.dense(hidden, 1)
        loss = tf.reduce_mean(tf.square(output - label1))
        train_op = tf.train.GradientDescentOptimizer(1e-3).minimize(loss)
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        data = np.random.rand(2, 2)
        output_value_ref = sess.run(output, feed_dict={input1: data})
        net = TFNet.from_session(sess, [input1], [output])
        output_value = net.forward(data)

        self.assert_allclose(output_value, output_value_ref)
Пример #8
0
    def _evaluate_distributed(self, dataset):

        tfnet = TFNet.from_session(K.get_session(),
                                   inputs=self.model.inputs,
                                   outputs=self.model.outputs)

        data = dataset.get_evaluation_data()

        if dataset.batch_per_thread < 0:
            batch_size = dataset.batch_size
        else:
            batch_size = dataset.batch_per_thread * dataset.get_num_partitions(
            )

        eval_methods = [
            to_bigdl_metric(m, self.model.loss) for m in self.metrics_names
        ]

        results = tfnet.evaluate(data, batch_size, eval_methods)
        final_result = [r.result for r in results]

        return final_result
Пример #9
0
    def evaluate(self,
                 input_fn,
                 eval_methods,
                 steps=None,
                 checkpoint_path=None):
        """Evaluates the model given evaluation data `input_fn`.

        :param input_fn: A function that constructs the input data for evaluation. The
            function should construct and return one of the following:
            * A `TFDataset` object, each elements of which is a tuple `(features, labels)`.
            * A `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple
            `(features, labels)` with same constraints as below.
            * A tuple `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary
            of string feature name to `Tensor` and `labels` is a `Tensor` or a
            dictionary of string label name to `Tensor`. Both `features` and
            `labels` are consumed by `model_fn`. They should satisfy the expectation
            of `model_fn` from inputs.
        :param eval_methods: a list of strings to specify the evaluation metrics to
                            be used in this model
        :param steps: Number of steps for which to evaluate model.
        :param checkpoint_path: Path of a specific checkpoint to evaluate. If `None`, the
            latest checkpoint in `model_dir` is used.  If there are no checkpoints
            in `model_dir`, evaluation is run with newly initialized `Variables`
            instead of ones restored from checkpoint.

        Returns:
          A dict containing the evaluation metrics specified in `model_fn` keyed by
          name.
        """
        if not all(
                isinstance(metric, six.string_types)
                for metric in eval_methods):
            raise ValueError("All metrics should be string types")
        with tf.Graph().as_default() as g:
            result = self.estimator._call_input_fn(input_fn,
                                                   tf.estimator.ModeKeys.EVAL)
            if isinstance(result, TFDataset):
                spec = self._call_model_fn(result.feature_tensors,
                                           result.label_tensors,
                                           tf.estimator.ModeKeys.PREDICT,
                                           self.config)
                latest_checkpoint = self.estimator.latest_checkpoint()

                if latest_checkpoint:
                    checkpoint_path = latest_checkpoint

                with tf.Session() as sess:
                    if checkpoint_path:
                        saver = tf.train.Saver()
                        saver.restore(sess, checkpoint_path)
                    else:
                        sess.run(tf.global_variables_initializer())
                    inputs = nest.flatten(result._original_tensors[0])
                    outputs = nest.flatten(spec.predictions)
                    tfnet = TFNet.from_session(sess,
                                               inputs=inputs,
                                               outputs=outputs)

                    data = result.get_evaluation_data()
                    if result.batch_per_thread < 0:
                        batch_size = result.batch_size
                    else:
                        batch_size = result.batch_per_thread * result.get_num_partitions(
                        )

                    eval_methods = [
                        self._to_bigdl_metric(m) for m in eval_methods
                    ]
                    results = tfnet.evaluate(data, batch_size, eval_methods)
                    final_result = dict([(r.method, r.result)
                                         for r in results])
                    return final_result

        return self.estimator.evaluate(input_fn,
                                       steps,
                                       checkpoint_path=checkpoint_path)
Пример #10
0
    logits, end_points = inception_v1(images, num_classes=1001)

sess = tf.Session()
saver = tf.train.Saver()
saver.restore(sess, "file:///home/hduser/slim/checkpoint/inception_v1.ckpt")
#saver.restore(sess, "hdfs:///slim/checkpoint/inception_v1.ckpt")
# You need to edit this path to the checkpoint you downloaded

from zoo.util.tf import export_tf
avg_pool = end_points['Mixed_3c']
export_tf(sess,
          "file:///home/hduser/slim/tfnet/",
          inputs=[images],
          outputs=[avg_pool])
from zoo.pipeline.api.net import TFNet
amodel = TFNet.from_export_folder("file:///home/hduser/slim/tfnet/")
from bigdl.nn.layer import Sequential, Transpose, Contiguous, Linear, ReLU, SoftMax, Reshape, View, MulConstant, SpatialAveragePooling
full_model = Sequential()
full_model.add(Transpose([(2, 4), (2, 3)]))
scalar = 1. / 255
full_model.add(MulConstant(scalar))
full_model.add(Contiguous())
full_model.add(amodel)
full_model.add(View([1024]))
full_model.add(Linear(1024, 5))
import re
from bigdl.nn.criterion import CrossEntropyCriterion
from pyspark import SparkConf
from pyspark.ml import Pipeline
from pyspark.sql import SQLContext
from pyspark.sql.functions import col, udf
Пример #11
0
if __name__ == '__main__':

    sparkConf = init_spark_conf().setAppName("testNNClassifer").setMaster('local[1]')
    sc = init_nncontext(sparkConf)
    spark = SparkSession \
        .builder \
        .getOrCreate()

    with tf.Graph().as_default():
        input1 = tf.placeholder(dtype=tf.float32, shape=(None, 2))
        hidden = tf.layers.dense(input1, 4)
        output = tf.sigmoid(tf.layers.dense(hidden, 1))
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            net = TFNet.from_session(sess, [input1], [output], generate_backward=True)

    df = spark.createDataFrame(
        [(Vectors.dense([2.0, 1.0]), 1.0),
         (Vectors.dense([1.0, 2.0]), 0.0),
         (Vectors.dense([2.0, 1.0]), 1.0),
         (Vectors.dense([1.0, 2.0]), 0.0)],
        ["features", "label"])

    print("before training:")
    NNModel(net).transform(df).show()

    classifier = NNClassifier(net, MSECriterion()) \
        .setBatchSize(4) \
        .setOptimMethod(Adam()) \
        .setLearningRate(0.1) \
    model = Model(config)

    # init or get SparkContext
    sc = init_nncontext()

    # model_dir = config.model_dir
    model_dir = find_latest_dir(os.path.join(config.model, 'model_save/'))

    #  export a TensorFlow model to frozen inference graph.
    with tf.Session() as sess:
        saver = tf.train.Saver()
        saver.restore(sess, os.path.join(model_dir, config.model))

        tfnet = TFNet.from_session(
            sess,
            inputs=[model.input_x, model.memories],  # dropout is never used
            outputs=[model.predictions])

    data_x_rdd = sc.parallelize(test_x, PARALLELISM)
    data_m_rdd = sc.parallelize(test_m, PARALLELISM)

    # create a RDD of Sample
    sample_rdd = data_x_rdd.zip(data_m_rdd).map(
        lambda x: Sample.from_ndarray(features=x, labels=np.zeros([1])))

    # distributed inference on Spark and return an RDD
    outputs = tfnet.predict(sample_rdd,
                            batch_per_thread=config.batch_size,
                            distributed=True)

    # check time when trigger actions
Пример #13
0
 def test_init_tf_net(self):
     resource_path = os.path.join(os.path.split(__file__)[0], "../../resources")
     tfnet_path = os.path.join(resource_path, "tfnet")
     net = TFNet.from_export_folder(tfnet_path)
     output = net.forward(np.random.rand(2, 4))
     assert output.shape == (2, 2)