Exemplo n.º 1
0
 def _test(array):
     height, width, chan = array.shape
     imgAsStruct = imageIO.imageArrayToStruct(array)
     self.assertEqual(imgAsStruct.height, height)
     self.assertEqual(imgAsStruct.width, width)
     self.assertEqual(imgAsStruct.data, array.tobytes())
     imgReconstructed = imageIO.imageStructToArray(imgAsStruct)
     np.testing.assert_array_equal(array, imgReconstructed)
Exemplo n.º 2
0
 def _test(array):
     height, width, chan = array.shape
     imgAsStruct = imageIO.imageArrayToStruct(array)
     self.assertEqual(imgAsStruct.height, height)
     self.assertEqual(imgAsStruct.width, width)
     self.assertEqual(imgAsStruct.data, array.tobytes())
     imgReconstructed = imageIO.imageStructToArray(imgAsStruct)
     np.testing.assert_array_equal(array, imgReconstructed)
Exemplo n.º 3
0
    def _getNumpyFeaturesAndLabels(self, dataset):
        """
        We assume the training data fits in memory on a single server.
        The input dataframe is converted to numerical image features and
        broadcast to all the worker nodes.
        """
        image_uri_col = self.getInputCol()
        label_col = None
        if self.isDefined(self.labelCol) and self.getLabelCol() != "":
            label_col = self.getLabelCol()
        tmp_image_col = self._loadedImageCol()
        image_df = self.loadImagesInternal(
            dataset, image_uri_col).dropna(subset=[tmp_image_col])

        # Extract features
        localFeatures = []
        rows = image_df.collect()
        for row in rows:
            spimg = row[tmp_image_col]
            features = imageStructToArray(spimg)
            localFeatures.append(features)

        if not localFeatures:  # NOTE(phi-dbq): pep-8 recommended against testing 0 == len(array)
            raise ValueError("Cannot extract any feature from dataset!")
        X = np.stack(localFeatures, axis=0)

        # Extract labels
        y = None
        if label_col is not None:
            label_schema = image_df.schema[label_col]
            label_dtype = label_schema.dataType
            assert isinstance(label_dtype, spla.VectorUDT), \
                "must encode labels in one-hot vector format, but got {}".format(label_dtype)

            localLabels = []
            for row in rows:
                try:
                    _keras_label = row[label_col].toArray()
                except ValueError:
                    raise ValueError("Cannot extract encoded label array")
                localLabels.append(_keras_label)

            if not localLabels:
                raise ValueError(
                    "Failed to load any labels from dataset, but labels are required"
                )

            y = np.stack(localLabels, axis=0)
            assert y.shape[0] == X.shape[0], \
                "number of features {} != number of labels {}".format(X.shape[0], y.shape[0])

        return X, y
Exemplo n.º 4
0
    def test_image_round_trip(self):
        # Test round trip: array -> png -> sparkImg -> array
        binarySchema = StructType([StructField("data", BinaryType(), False)])
        df = self.session.createDataFrame([[bytearray(pngData)]], binarySchema)

        # Convert to images
        decImg = udf(imageIO._decodeImage, imageIO.imageSchema)
        imageDF = df.select(decImg("data").alias("image"))
        row = imageDF.first()

        testArray = imageIO.imageStructToArray(row.image)
        self.assertEqual(testArray.shape, array.shape)
        self.assertEqual(testArray.dtype, array.dtype)
        self.assertTrue(np.all(array == testArray))
Exemplo n.º 5
0
    def test_image_round_trip(self):
        # Test round trip: array -> png -> sparkImg -> array
        binarySchema = StructType([StructField("data", BinaryType(), False)])
        df = self.session.createDataFrame([[bytearray(pngData)]], binarySchema)

        # Convert to images
        decImg = udf(imageIO._decodeImage, imageIO.imageSchema)
        imageDF = df.select(decImg("data").alias("image"))
        row = imageDF.first()

        testArray = imageIO.imageStructToArray(row.image)
        self.assertEqual(testArray.shape, array.shape)
        self.assertEqual(testArray.dtype, array.dtype)
        self.assertTrue(np.all(array == testArray))
    def _getNumpyFeaturesAndLabels(self, dataset):
        """
        We assume the training data fits in memory on a single server.
        The input dataframe is converted to numerical image features and
        broadcast to all the worker nodes.
        """
        image_uri_col = self.getInputCol()
        label_col = None
        if self.isDefined(self.labelCol) and self.getLabelCol() != "":
            label_col = self.getLabelCol()
        tmp_image_col = self._loadedImageCol()
        image_df = self.loadImagesInternal(dataset, image_uri_col).dropna(subset=[tmp_image_col])

        # Extract features
        localFeatures = []
        rows = image_df.collect()
        for row in rows:
            spimg = row[tmp_image_col]
            features = imageStructToArray(spimg)
            localFeatures.append(features)

        if not localFeatures:  # NOTE(phi-dbq): pep-8 recommended against testing 0 == len(array)
            raise ValueError("Cannot extract any feature from dataset!")
        X = np.stack(localFeatures, axis=0)

        # Extract labels
        y = None
        if label_col is not None:
            label_schema = image_df.schema[label_col]
            label_dtype = label_schema.dataType
            assert isinstance(label_dtype, spla.VectorUDT), \
                "must encode labels in one-hot vector format, but got {}".format(label_dtype)

            localLabels = []
            for row in rows:
                try:
                    _keras_label = row[label_col].toArray()
                except ValueError:
                    raise ValueError("Cannot extract encoded label array")
                localLabels.append(_keras_label)

            if not localLabels:
                raise ValueError("Failed to load any labels from dataset, but labels are required")

            y = np.stack(localLabels, axis=0)
            assert y.shape[0] == X.shape[0], \
                "number of features {} != number of labels {}".format(X.shape[0], y.shape[0])

        return X, y
Exemplo n.º 7
0
 def _executeTensorflow(self, graph, input_tensor_name, output_tensor_name,
                        df,  input_col="image"):
     with tf.Session(graph=graph) as sess:
         output_tensor = graph.get_tensor_by_name(output_tensor_name)
         image_collected = df.collect()
         values = {}
         topK = {}
         for img_row in image_collected:
             image = np.expand_dims(imageStructToArray(img_row[input_col]), axis=0)
             uri = img_row['image']['origin']
             output = sess.run([output_tensor],
                               feed_dict={
                                   graph.get_tensor_by_name(input_tensor_name): image
             })
             values[uri] = np.array(output[0])
             topK[uri] = decode_predictions(values[uri], top=5)[0]
     return values, topK
Exemplo n.º 8
0
 def _executeTensorflow(self, graph, input_tensor_name, output_tensor_name,
                        df, id_col="filePath", input_col="image"):
     with tf.Session(graph=graph) as sess:
         output_tensor = graph.get_tensor_by_name(output_tensor_name)
         image_collected = df.collect()
         values = {}
         topK = {}
         for img_row in image_collected:
             image = np.expand_dims(imageStructToArray(img_row[input_col]), axis=0)
             uri = img_row[id_col]
             output = sess.run([output_tensor],
                               feed_dict={
                                   graph.get_tensor_by_name(input_tensor_name): image
                               })
             values[uri] = np.array(output[0])
             topK[uri] = decode_predictions(values[uri], top=5)[0]
     return values, topK
    def test_loadImages(self):
        input_col = "uri"
        output_col = "preds"

        model_path = image_utils.prepInceptionV3KerasModelFile("inceptionV3.h5")
        transformer = KerasImageFileTransformer(
            inputCol=input_col, outputCol=output_col, modelFile=model_path,
            imageLoader=image_utils.loadAndPreprocessKerasInceptionV3, outputMode="vector")

        uri_df = image_utils.getSampleImagePathsDF(self.sql, input_col)
        image_df = transformer.loadImagesInternal(uri_df, input_col)
        self.assertEqual(len(image_df.columns), 2)

        img_col = transformer._loadedImageCol()
        expected_shape = InceptionV3Constants.INPUT_SHAPE + (3,)
        for row in image_df.collect():
            arr = imageStructToArray(row[img_col])
            self.assertEqual(arr.shape, expected_shape)
Exemplo n.º 10
0
    def test_resize(self):
        self.assertRaises(ValueError, imageIO.createResizeImageUDF, [1, 2, 3])

        make_smaller = imageIO.createResizeImageUDF([4, 5]).func
        imgAsRow = imageIO.imageArrayToStruct(array)
        smallerImg = make_smaller(imgAsRow)
        self.assertEqual(smallerImg.height, 4)
        self.assertEqual(smallerImg.width, 5)

        # Compare to PIL resizing
        imgAsPIL = PIL.Image.fromarray(obj=imageIO._reverseChannels(array)).resize((5, 4))
        smallerAry = imageIO._reverseChannels(np.asarray(imgAsPIL))
        np.testing.assert_array_equal(smallerAry, imageIO.imageStructToArray(smallerImg))
        # Test that resize with the same size is a no-op
        sameImage = imageIO.createResizeImageUDF((imgAsRow.height, imgAsRow.width)).func(imgAsRow)
        self.assertEqual(imgAsRow, sameImage)
        # Test that we have a valid image schema (all fields are in)
        for n in ImageSchema.imageSchema['image'].dataType.names:
            smallerImg[n]
    def test_loadImages(self):
        input_col = "uri"
        output_col = "preds"

        model_path = image_utils.prepInceptionV3KerasModelFile("inceptionV3.h5")
        transformer = KerasImageFileTransformer(inputCol=input_col, outputCol=output_col,
                                                modelFile=model_path,
                                                imageLoader=image_utils.loadAndPreprocessKerasInceptionV3,
                                                outputMode="vector")

        uri_df = image_utils.getSampleImagePathsDF(self.sql, input_col)
        image_df = transformer._loadImages(uri_df)
        self.assertEqual(len(image_df.columns), 2)

        img_col = transformer._loadedImageCol()
        expected_shape = InceptionV3Constants.INPUT_SHAPE + (3,)
        for row in image_df.collect():
            arr = imageStructToArray(row[img_col])
            self.assertEqual(arr.shape, expected_shape)
Exemplo n.º 12
0
    def test_resize(self):
        self.assertRaises(ValueError, imageIO.createResizeImageUDF, [1, 2, 3])

        make_smaller = imageIO.createResizeImageUDF([4, 5]).func
        imgAsRow = imageIO.imageArrayToStruct(array)
        smallerImg = make_smaller(imgAsRow)
        self.assertEqual(smallerImg.height, 4)
        self.assertEqual(smallerImg.width, 5)

        # Compare to PIL resizing
        imgAsPIL = PIL.Image.fromarray(
            obj=imageIO._reverseChannels(array)).resize((5, 4))
        smallerAry = imageIO._reverseChannels(np.asarray(imgAsPIL))
        np.testing.assert_array_equal(smallerAry,
                                      imageIO.imageStructToArray(smallerImg))
        # Test that resize with the same size is a no-op
        sameImage = imageIO.createResizeImageUDF(
            (imgAsRow.height, imgAsRow.width)).func(imgAsRow)
        self.assertEqual(imgAsRow, sameImage)
        # Test that we have a valid image schema (all fields are in)
        for n in ImageSchema.imageSchema['image'].dataType.names:
            smallerImg[n]
Exemplo n.º 13
0
 def do_nothing(imgRow):
     array = imageIO.imageStructToArray(imgRow)
     return imageIO.imageArrayToStruct(array)
Exemplo n.º 14
0
 def do_nothing(imgRow):
     imType = imageIO.imageType(imgRow)
     array = imageIO.imageStructToArray(imgRow)
     return imageIO.imageArrayToStruct(array, imType.sparkMode)
Exemplo n.º 15
0
 def do_nothing(imgRow):
     array = imageIO.imageStructToArray(imgRow)
     return imageIO.imageArrayToStruct(array)
Exemplo n.º 16
0
 def do_nothing(imgRow):
     imType = imageIO.imageType(imgRow)
     array = imageIO.imageStructToArray(imgRow)
     return imageIO.imageArrayToStruct(array, imType.sparkMode)