def test_DeepImagePredictorNoReshape(self):
        """
        Run sparkDL predictor on manually-resized images and compare result to the
        keras result.
        """
        imageArray = self.imageArray
        kerasPredict = self.kerasPredict

        def rowWithImage(img):
            # return [imageIO.imageArrayToStruct(img.astype('uint8'), imageType.sparkMode)]
            row = imageIO.imageArrayToStruct(img.astype('uint8'))
            # re-order row to avoid pyspark bug
            return [[
                getattr(row, field.name)
                for field in ImageSchema.imageSchema['image'].dataType
            ]]

        # test: predictor vs keras on resized images
        rdd = self.sc.parallelize([rowWithImage(img) for img in imageArray])
        dfType = ImageSchema.imageSchema
        imageDf = rdd.toDF(dfType)
        if self.numPartitionsOverride:
            imageDf = imageDf.coalesce(self.numPartitionsOverride)

        transformer = DeepImagePredictor(inputCol='image',
                                         modelName=self.name,
                                         outputCol="prediction")
        dfPredict = transformer.transform(imageDf).collect()
        dfPredict = np.array([i.prediction for i in dfPredict])

        self.assertEqual(kerasPredict.shape, dfPredict.shape)
        np.testing.assert_array_almost_equal(kerasPredict, dfPredict)
    def test_DeepImagePredictorNoReshape(self):
        """
        Run sparkDL predictor on manually-resized images and compare result to the
        keras result.
        """
        imageArray = self.imageArray
        kerasPredict = self.kerasPredict
        def rowWithImage(img):
            # return [imageIO.imageArrayToStruct(img.astype('uint8'), imageType.sparkMode)]
            row = imageIO.imageArrayToStruct(img.astype('uint8'), imageIO.SparkMode.RGB)
            # re-order row to avoid pyspark bug
            return [[getattr(row, field.name) for field in imageIO.imageSchema]]

        # test: predictor vs keras on resized images
        rdd = self.sc.parallelize([rowWithImage(img) for img in imageArray])
        dfType = StructType([StructField("image", imageIO.imageSchema)])
        imageDf = rdd.toDF(dfType)

        transformer = DeepImagePredictor(inputCol='image', modelName=self.name,
                                         outputCol="prediction")
        dfPredict = transformer.transform(imageDf).collect()
        dfPredict = np.array([i.prediction for i in dfPredict])

        self.assertEqual(kerasPredict.shape, dfPredict.shape)
        np.testing.assert_array_almost_equal(kerasPredict, dfPredict)
    def test_DeepImagePredictor(self):
        """
        Tests that predictor returns (almost) the same values as Keras.
        """
        kerasPredict = self.kerasPredict
        transformer = DeepImagePredictor(inputCol='image', modelName=self.name,
                                         outputCol="prediction",)
        fullPredict = transformer.transform(self.imageDF).collect()
        fullPredict = np.array([i.prediction for i in fullPredict])

        self.assertEqual(kerasPredict.shape, fullPredict.shape)
        np.testing.assert_array_almost_equal(kerasPredict, fullPredict, decimal=6)
 def test_DeepImagePredictor(self):
     """
     Tests that predictor returns (almost) the same values as Keras.
     """
     kerasPredict = self.kerasPredict
     transformer = DeepImagePredictor(inputCol='image', modelName=self.name,
                                      outputCol="prediction",)
     fullPredict = self._sortByFileOrder(transformer.transform(self.imageDF).collect())
     fullPredict = np.array([i.prediction for i in fullPredict])
     self.assertEqual(kerasPredict.shape, fullPredict.shape)
     np.testing.assert_array_almost_equal(kerasPredict,
                                          fullPredict,
                                          decimal=self.featurizerCompareDigitsExact)
    def test_inceptionV3_prediction_decoded(self):
        output_col = "prediction"
        topK = 10
        transformer = DeepImagePredictor(inputCol="image", outputCol=output_col,
                                         modelName="InceptionV3", decodePredictions=True, topK=topK)

        image_df = getSampleImageDF()
        transformed_df = transformer.transform(image_df.limit(5))

        collected = transformed_df.collect()
        for row in collected:
            predictions = row[output_col]
            self.assertEqual(len(predictions), topK)
    def test_prediction_decoded(self):
        """
        Tests that predictor with decoded=true returns reasonable values.
        """
        output_col = "prediction"
        topK = 10
        transformer = DeepImagePredictor(inputCol="image", outputCol=output_col,
                                         modelName=self.name, decodePredictions=True, topK=topK)
        transformed_df = transformer.transform(self.imageDF)

        collected = transformed_df.collect()
        for row in collected:
            predictions = row[output_col]
            self.assertEqual(len(predictions), topK)
    def test_prediction_decoded(self):
        """
        Tests that predictor with decoded=true returns reasonable values.
        """
        output_col = "prediction"
        topK = 10
        transformer = DeepImagePredictor(inputCol="image", outputCol=output_col,
                                         modelName=self.name, decodePredictions=True, topK=topK)
        transformed_df = transformer.transform(self.imageDF)

        collected = transformed_df.collect()
        for row in collected:
            predictions = row[output_col]
            self.assertEqual(len(predictions), topK)
示例#8
0
    def test_inceptionV3_prediction_decoded(self):
        output_col = "prediction"
        topK = 10
        transformer = DeepImagePredictor(inputCol="image",
                                         outputCol=output_col,
                                         modelName="InceptionV3",
                                         decodePredictions=True,
                                         topK=topK)

        image_df = getSampleImageDF()
        transformed_df = transformer.transform(image_df.limit(5))

        collected = transformed_df.collect()
        for row in collected:
            predictions = row[output_col]
            self.assertEqual(len(predictions), topK)
    def test_DeepImagePredictor(self):
        """
        Run sparkDL inceptionV3 transformer on raw (original size) images and compare result to
        above keras (using keras resizing) result.
        """
        kerasPredict = self.kerasPredict
        transformer = DeepImagePredictor(inputCol='image', modelName="InceptionV3",
                                         outputCol="prediction",)
        origImgDf = getSampleImageDF()
        fullPredict = transformer.transform(origImgDf).collect()
        fullPredict = np.array([i.prediction for i in fullPredict])

        self.assertEqual(kerasPredict.shape, fullPredict.shape)
        # We use a large tolerance below because of differences in the resize step
        # TODO: match keras resize step to get closer prediction
        np.testing.assert_array_almost_equal(kerasPredict, fullPredict, decimal=6)
示例#10
0
    def test_DeepImagePredictor(self):
        """
        Run sparkDL transformer on raw (original size) images and compare result to
        above keras (using keras resizing) result.
        """
        kerasPredict = self.kerasPredict
        transformer = DeepImagePredictor(
            inputCol='image',
            modelName=self.name,
            outputCol="prediction",
        )
        fullPredict = transformer.transform(self.imageDF).collect()
        fullPredict = np.array([i.prediction for i in fullPredict])

        self.assertEqual(kerasPredict.shape, fullPredict.shape)
        # We use a large tolerance below because of differences in the resize step
        # TODO: match keras resize step to get closer prediction
        np.testing.assert_array_almost_equal(kerasPredict,
                                             fullPredict,
                                             decimal=6)
示例#11
0
    def test_DeepImagePredictorNoReshape(self):
        """
        Run sparkDL predictor on manually-resized images and compare result to the
        keras result.
        """
        imageArray = self.imageArray
        kerasPredict = self.kerasPredict

        # test: predictor vs keras on resized images
        rdd = self.sc.parallelize(
            [self._rowWithImage(img) for img in imageArray])
        dfType = ImageSchema.imageSchema
        imageDf = rdd.toDF(dfType)
        if self.numPartitionsOverride:
            imageDf = imageDf.coalesce(self.numPartitionsOverride)

        transformer = DeepImagePredictor(inputCol='image',
                                         modelName=self.name,
                                         outputCol="prediction")
        dfPredict = transformer.transform(imageDf).collect()
        dfPredict = np.array([i.prediction for i in dfPredict])

        self.assertEqual(kerasPredict.shape, dfPredict.shape)
        np.testing.assert_array_almost_equal(kerasPredict, dfPredict)
    def test_inceptionV3_prediction(self):
        """
        Test inceptionV3 using keras, tensorflow and sparkDL

        We run the sparkDL test with and without resizing beforehand
        """
        imgFiles, images = getSampleImageList()
        imageArray = np.empty((len(images), 299, 299, 3), 'uint8')
        for i, img in enumerate(images):
            assert img is not None and img.mode == "RGB"
            imageArray[i] = np.array(img.resize((299, 299)))

        # Basic keras flow
        # We predict the class probabilities for the images in our test library using keras API.
        prepedImaged = inception_v3.preprocess_input(
            imageArray.astype('float32'))
        model = inception_v3.InceptionV3()
        kerasPredict = model.predict(prepedImaged)

        # test: _buildTfGraphForName
        # Run the graph produced by _buildTfGraphForName and compare the result to above keras
        # result.
        modelGraphInfo = _buildTFGraphForName("InceptionV3", False)
        graph = modelGraphInfo["graph"]
        sess = tf.Session(graph=graph)
        with sess.as_default():
            inputTensor = graph.get_tensor_by_name(
                modelGraphInfo["inputTensorName"])
            outputTensor = graph.get_tensor_by_name(
                modelGraphInfo["outputTensorName"])
            tfPredict = sess.run(outputTensor, {inputTensor: imageArray})

        self.assertEqual(kerasPredict.shape, tfPredict.shape)
        np.testing.assert_array_almost_equal(kerasPredict, tfPredict)

        imageType = imageIO.pilModeLookup["RGB"]

        def rowWithImage(img):
            # return [imageIO.imageArrayToStruct(img.astype('uint8'), imageType.sparkMode)]
            row = imageIO.imageArrayToStruct(img.astype('uint8'),
                                             imageType.sparkMode)
            # re-order row to avoid pyspark bug
            return [[
                getattr(row, field.name) for field in imageIO.imageSchema
            ]]

        # test: predictor vs keras on resized images
        # Run sparkDL inceptionV3 transformer on resized images and compare result to above keras
        # result.
        rdd = self.sc.parallelize([rowWithImage(img) for img in imageArray])
        dfType = StructType([StructField("image", imageIO.imageSchema)])
        imageDf = rdd.toDF(dfType)

        transformer = DeepImagePredictor(
            inputCol='image',
            modelName="InceptionV3",
            outputCol="prediction",
        )
        dfPredict = transformer.transform(imageDf).collect()
        dfPredict = np.array([i.prediction for i in dfPredict])

        self.assertEqual(kerasPredict.shape, dfPredict.shape)
        np.testing.assert_array_almost_equal(kerasPredict, dfPredict)

        # test: predictor vs keras on raw images
        # Run sparkDL inceptionV3 transformer on raw (original size) images and compare result to
        # above keras (using keras resizing) result.
        origImgDf = getSampleImageDF()
        fullPredict = transformer.transform(origImgDf).collect()
        fullPredict = np.array([i.prediction for i in fullPredict])

        self.assertEqual(kerasPredict.shape, fullPredict.shape)
        # We use a large tolerance below because of differences in the resize step
        # TODO: match keras resize step to get closer prediction
        np.testing.assert_array_almost_equal(kerasPredict,
                                             fullPredict,
                                             decimal=6)