示例#1
0
 def _rowWithImage(self, img):
     row = imageIO.imageArrayToStruct(img.astype('uint8'))
     # re-order row to avoid pyspark bug
     return [[
         getattr(row, field.name)
         for field in ImageSchema.imageSchema['image'].dataType
     ]]
 def check_image_round_trip(img_arr):
     spimg_dict = imageArrayToStruct(img_arr).asDict()
     spimg_dict['data'] = bytes(spimg_dict['data'])
     img_arr_out = exec_gfn_spimg_decode(
         spimg_dict,
         imageTypeByOrdinal(spimg_dict['mode']).dtype)
     self.assertTrue(np.all(img_arr_out == img_arr))
    def test_pipeline(self):
        """ Pipeline should provide correct function composition """
        img_fpaths = glob(os.path.join(_getSampleJPEGDir(), '*.jpg'))

        xcpt_model = Xception(weights="imagenet")
        stages = [('spimage',
                   gfac.buildSpImageConverter(SparkMode.RGB_FLOAT32)),
                  ('xception', GraphFunction.fromKeras(xcpt_model))]
        piped_model = GraphFunction.fromList(stages)

        for fpath in img_fpaths:
            target_size = tuple(xcpt_model.input.shape.as_list()[1:-1])
            img = load_img(fpath, target_size=target_size)
            img_arr = np.expand_dims(img_to_array(img), axis=0)
            img_input = xcpt.preprocess_input(img_arr)
            preds_ref = xcpt_model.predict(img_input)

            spimg_input_dict = imageArrayToStruct(img_input).asDict()
            spimg_input_dict['data'] = bytes(spimg_input_dict['data'])
            with IsolatedSession() as issn:
                # Need blank import scope name so that spimg fields match the input names
                feeds, fetches = issn.importGraphFunction(piped_model,
                                                          prefix="")
                feed_dict = dict(
                    (tnsr, spimg_input_dict[tfx.op_name(tnsr, issn.graph)])
                    for tnsr in feeds)
                preds_tgt = issn.run(fetches[0], feed_dict=feed_dict)
                # Uncomment the line below to see the graph
                # tfx.write_visualization_html(issn.graph,
                #                              NamedTemporaryFile(prefix="gdef", suffix=".html").name)

            self.assertTrue(np.all(preds_tgt == preds_ref))
    def test_pipeline(self):
        """ Pipeline should provide correct function composition """
        img_fpaths = glob(os.path.join(_getSampleJPEGDir(), '*.jpg'))

        xcpt_model = Xception(weights="imagenet")
        stages = [('spimage', gfac.buildSpImageConverter(SparkMode.RGB_FLOAT32)),
                  ('xception', GraphFunction.fromKeras(xcpt_model))]
        piped_model = GraphFunction.fromList(stages)

        for fpath in img_fpaths:
            target_size = tuple(xcpt_model.input.shape.as_list()[1:-1])
            img = load_img(fpath, target_size=target_size)
            img_arr = np.expand_dims(img_to_array(img), axis=0)
            img_input = xcpt.preprocess_input(img_arr)
            preds_ref = xcpt_model.predict(img_input)

            spimg_input_dict = imageArrayToStruct(img_input).asDict()
            spimg_input_dict['data'] = bytes(spimg_input_dict['data'])
            with IsolatedSession() as issn:
                # Need blank import scope name so that spimg fields match the input names
                feeds, fetches = issn.importGraphFunction(piped_model, prefix="")
                feed_dict = dict((tnsr, spimg_input_dict[tfx.op_name(issn.graph, tnsr)]) for tnsr in feeds)
                preds_tgt = issn.run(fetches[0], feed_dict=feed_dict)
                # Uncomment the line below to see the graph
                # tfx.write_visualization_html(issn.graph,
                #                              NamedTemporaryFile(prefix="gdef", suffix=".html").name)

            self.assertTrue(np.all(preds_tgt == preds_ref))
示例#5
0
 def rowWithImage(img):
     # return [imageIO.imageArrayToStruct(img.astype('uint8'), imageType.sparkMode)]
     row = imageIO.imageArrayToStruct(img.astype('uint8'),
                                      imageIO.SparkMode.RGB)
     # re-order row to avoid pyspark bug
     return [[
         getattr(row, field.name) for field in imageIO.imageSchema
     ]]
 def _test(array):
     height, width, chan = array.shape
     imgAsStruct = imageIO.imageArrayToStruct(array)
     self.assertEqual(imgAsStruct.height, height)
     self.assertEqual(imgAsStruct.width, width)
     self.assertEqual(imgAsStruct.data, array.tobytes())
     imgReconstructed = imageIO.imageStructToArray(imgAsStruct)
     np.testing.assert_array_equal(array, imgReconstructed)
 def _test(array):
     height, width, chan = array.shape
     imgAsStruct = imageIO.imageArrayToStruct(array)
     self.assertEqual(imgAsStruct.height, height)
     self.assertEqual(imgAsStruct.width, width)
     self.assertEqual(imgAsStruct.data, array.tobytes())
     imgReconstructed = imageIO.imageStructToArray(imgAsStruct)
     np.testing.assert_array_equal(array, imgReconstructed)
示例#8
0
def create_image(img_id, img_in_bytes, height, width, encoding, is_bigendian):
    import numpy as np
    import cv2
    from sparkdl.image.imageIO import imageArrayToStruct

    a = np.fromstring(str(img_in_bytes), dtype=np.uint8)
    img = a.reshape(height, width, 3)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    return imageArrayToStruct(img)
    def test_imageArrayToStruct(self):
        SparkMode = imageIO.SparkMode
        # Check converting with matching types
        height, width, chan = array.shape
        imgAsStruct = imageIO.imageArrayToStruct(array)
        self.assertEqual(imgAsStruct.height, height)
        self.assertEqual(imgAsStruct.width, width)
        self.assertEqual(imgAsStruct.data, array.tobytes())

        # Check casting
        imgAsStruct = imageIO.imageArrayToStruct(array, SparkMode.RGB_FLOAT32)
        self.assertEqual(imgAsStruct.height, height)
        self.assertEqual(imgAsStruct.width, width)
        self.assertEqual(len(imgAsStruct.data), array.size * 4)

        # Check channel mismatch
        self.assertRaises(ValueError, imageIO.imageArrayToStruct, array, SparkMode.FLOAT32)

        # Check that unsafe cast raises error
        floatArray = np.zeros((3, 4, 3), dtype='float32')
        self.assertRaises(ValueError, imageIO.imageArrayToStruct, floatArray, SparkMode.RGB)
示例#10
0
def create_image_dataframe(row):

    img_array = load_image_from_uri(row.uri)

    image_dataframe = imageArrayToStruct(img_array)

    # updated Mode to be 16 _OcvType(name="CV_8UC3", ord=16, nChannels=3, dtype="uint8"), : reference https://github.com/databricks/spark-deep-learning/blob/master/python/sparkdl/image/imageIO.py
    d = image_dataframe.asDict()
    d['mode'] = 16
    new_row = Row(**d)

    return new_row , row.label
示例#11
0
    def test_resize(self):
        imgAsRow = imageIO.imageArrayToStruct(array)
        smaller = imageIO._resizeFunction([4, 5])
        smallerImg = smaller(imgAsRow)
        for n in imageIO.imageSchema.names:
            smallerImg[n]
        self.assertEqual(smallerImg.height, 4)
        self.assertEqual(smallerImg.width, 5)

        sameImage = imageIO._resizeFunction([imgAsRow.height, imgAsRow.width])(imgAsRow)
        self.assertEqual(sameImage, sameImage)

        self.assertRaises(ValueError, imageIO._resizeFunction, [1, 2, 3])
示例#12
0
    def test_imageArrayToStruct(self):
        SparkMode = imageIO.SparkMode
        # Check converting with matching types
        height, width, chan = array.shape
        imgAsStruct = imageIO.imageArrayToStruct(array)
        self.assertEqual(imgAsStruct.height, height)
        self.assertEqual(imgAsStruct.width, width)
        self.assertEqual(imgAsStruct.data, array.tobytes())

        # Check casting
        imgAsStruct = imageIO.imageArrayToStruct(array, SparkMode.RGB_FLOAT32)
        self.assertEqual(imgAsStruct.height, height)
        self.assertEqual(imgAsStruct.width, width)
        self.assertEqual(len(imgAsStruct.data), array.size * 4)

        # Check channel mismatch
        self.assertRaises(ValueError, imageIO.imageArrayToStruct, array,
                          SparkMode.FLOAT32)

        # Check that unsafe cast raises error
        floatArray = np.zeros((3, 4, 3), dtype='float32')
        self.assertRaises(ValueError, imageIO.imageArrayToStruct, floatArray,
                          SparkMode.RGB)
示例#13
0
    def test_resize(self):
        imgAsRow = imageIO.imageArrayToStruct(array)
        smaller = imageIO._resizeFunction([4, 5])
        smallerImg = smaller(imgAsRow)
        for n in imageIO.imageSchema.names:
            smallerImg[n]
        self.assertEqual(smallerImg.height, 4)
        self.assertEqual(smallerImg.width, 5)

        sameImage = imageIO._resizeFunction([imgAsRow.height,
                                             imgAsRow.width])(imgAsRow)
        self.assertEqual(sameImage, sameImage)

        self.assertRaises(ValueError, imageIO._resizeFunction, [1, 2, 3])
    def udf_impl(spimg):
        import numpy as np
        from PIL import Image
        from tempfile import NamedTemporaryFile
        from sparkdl.image.imageIO import imageArrayToStruct, imageType

        pil_mode = imageType(spimg).pilMode
        img_shape = (spimg.width, spimg.height)
        img = Image.frombytes(pil_mode, img_shape, bytes(spimg.data))
        # Warning: must use lossless format to guarantee consistency
        temp_fp = NamedTemporaryFile(suffix='.png')
        img.save(temp_fp, 'PNG')
        img_arr_reloaded = preprocessor(temp_fp.name)
        assert isinstance(img_arr_reloaded, np.ndarray), \
            "expect preprocessor to return a numpy array"
        img_arr_reloaded = img_arr_reloaded.astype(np.uint8)
        return imageArrayToStruct(img_arr_reloaded)
def create_image(img_in_bytes, height, width, encoding, is_bigendian):
    import numpy as np
    import cv2
    from sparkdl.image.imageIO import imageArrayToStruct

    result = None

    # potentially, due to the time alignment there area image entries with empty data
    if (img_in_bytes):

        a = np.fromstring(base64.standard_b64decode(img_in_bytes),
                          dtype=np.uint8)
        img = a.reshape(height, width, 3)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        result = imageArrayToStruct(img)

    return result
    def udf_impl(spimg):
        import numpy as np
        from tempfile import NamedTemporaryFile
        from sparkdl.image.imageIO import imageArrayToStruct

        img = imageIO.imageStructToPIL(spimg)
        # Warning: must use lossless format to guarantee consistency
        temp_fp = NamedTemporaryFile(suffix='.png')
        img.save(temp_fp, 'PNG')
        img_arr_reloaded = preprocessor(temp_fp.name)
        assert isinstance(img_arr_reloaded, np.ndarray), \
            "expect preprocessor to return a numpy array"
        img_arr_reloaded = img_arr_reloaded.astype(np.uint8)
        # Keras works in RGB order, need to fix the order
        img_arr_reloaded = imageIO.fixColorChannelOrdering(
            currentOrder='RGB', imgAry=img_arr_reloaded)
        return imageArrayToStruct(img_arr_reloaded)
    def udf_impl(spimg):
        import numpy as np
        from tempfile import NamedTemporaryFile
        from sparkdl.image.imageIO import imageArrayToStruct

        img = imageIO.imageStructToPIL(spimg)
        # Warning: must use lossless format to guarantee consistency
        temp_fp = NamedTemporaryFile(suffix='.png')
        img.save(temp_fp, 'PNG')
        img_arr_reloaded = preprocessor(temp_fp.name)
        assert isinstance(img_arr_reloaded, np.ndarray), \
            "expect preprocessor to return a numpy array"
        img_arr_reloaded = img_arr_reloaded.astype(np.uint8)
        # Keras works in RGB order, need to fix the order
        img_arr_reloaded = imageIO.fixColorChannelOrdering(
            currentOrder='RGB', imgAry=img_arr_reloaded)
        return imageArrayToStruct(img_arr_reloaded)
    def test_resize(self):
        self.assertRaises(ValueError, imageIO.createResizeImageUDF, [1, 2, 3])

        make_smaller = imageIO.createResizeImageUDF([4, 5]).func
        imgAsRow = imageIO.imageArrayToStruct(array)
        smallerImg = make_smaller(imgAsRow)
        self.assertEqual(smallerImg.height, 4)
        self.assertEqual(smallerImg.width, 5)

        # Compare to PIL resizing
        imgAsPIL = PIL.Image.fromarray(obj=imageIO._reverseChannels(array)).resize((5, 4))
        smallerAry = imageIO._reverseChannels(np.asarray(imgAsPIL))
        np.testing.assert_array_equal(smallerAry, imageIO.imageStructToArray(smallerImg))
        # Test that resize with the same size is a no-op
        sameImage = imageIO.createResizeImageUDF((imgAsRow.height, imgAsRow.width)).func(imgAsRow)
        self.assertEqual(imgAsRow, sameImage)
        # Test that we have a valid image schema (all fields are in)
        for n in ImageSchema.imageSchema['image'].dataType.names:
            smallerImg[n]
    def test_resize(self):
        self.assertRaises(ValueError, imageIO.createResizeImageUDF, [1, 2, 3])

        make_smaller = imageIO.createResizeImageUDF([4, 5]).func
        imgAsRow = imageIO.imageArrayToStruct(array)
        smallerImg = make_smaller(imgAsRow)
        self.assertEqual(smallerImg.height, 4)
        self.assertEqual(smallerImg.width, 5)

        # Compare to PIL resizing
        imgAsPIL = PIL.Image.fromarray(
            obj=imageIO._reverseChannels(array)).resize((5, 4))
        smallerAry = imageIO._reverseChannels(np.asarray(imgAsPIL))
        np.testing.assert_array_equal(smallerAry,
                                      imageIO.imageStructToArray(smallerImg))
        # Test that resize with the same size is a no-op
        sameImage = imageIO.createResizeImageUDF(
            (imgAsRow.height, imgAsRow.width)).func(imgAsRow)
        self.assertEqual(imgAsRow, sameImage)
        # Test that we have a valid image schema (all fields are in)
        for n in ImageSchema.imageSchema['image'].dataType.names:
            smallerImg[n]
 def load_image_uri_impl(uri):
     try:
         return imageArrayToStruct(loader(uri))
     except:  # pylint: disable=bare-except
         return None
示例#21
0
#arr = rdd.take(1)[0]
#
#Image.open(BytesIO(arr))

# COMMAND ----------

from PIL import Image
from io import BytesIO
from pyspark.sql.types import BinaryType, StructType, StructField
from functools import partial

rdd = fin.flatMap(
    partial(msg_map, func=lambda r: r.data, conn=conn_d['/center_camera/image_color/compressed'])
)

rddTuple = rdd.map(lambda x: (bytearray(x),))
schema = StructType([StructField('rawdata', BinaryType(), False)])
df = rddTuple.toDF(schema)
df.cache()

# COMMAND ----------

from sparkdl.image.imageIO import PIL_decode, imageArrayToStruct
from pyspark.sql.functions import col
from pyspark.ml.image import ImageSchema

imageUdf = udf(lambda b: imageArrayToStruct(PIL_decode(b)), ImageSchema.imageSchema['image'].dataType)


img = df.withColumn('image', imageUdf(col('rawdata')))
display(img.select('image'))
 def do_nothing(imgRow):
     array = imageIO.imageStructToArray(imgRow)
     return imageIO.imageArrayToStruct(array)
 def keras_load_spimg(fpath):
     return imageArrayToStruct(keras_load_img(fpath))
 def pil_load_spimg(fpath):
     from PIL import Image
     import numpy as np
     img_arr = np.array(Image.open(fpath), dtype=np.uint8)
     return imageArrayToStruct(img_arr)
 def do_nothing(imgRow):
     array = imageIO.imageStructToArray(imgRow)
     return imageIO.imageArrayToStruct(array)
 def load(uri):
     img = loader(uri)
     return imageIO.imageArrayToStruct(img)
 def rowWithImage(img):
     # return [imageIO.imageArrayToStruct(img.astype('uint8'), imageType.sparkMode)]
     row = imageIO.imageArrayToStruct(img.astype('uint8'), imageIO.SparkMode.RGB)
     # re-order row to avoid pyspark bug
     return [[getattr(row, field.name) for field in imageIO.imageSchema]]
示例#28
0
 def do_nothing(imgRow):
     imType = imageIO.imageType(imgRow)
     array = imageIO.imageStructToArray(imgRow)
     return imageIO.imageArrayToStruct(array, imType.sparkMode)
示例#29
0
 def load(uri):
     img = loader(uri)
     return imageIO.imageArrayToStruct(img)
 def load_image_uri_impl(uri):
     try:
         return imageArrayToStruct(_reverseChannels(loader(uri)))
     except BaseException:  # pylint: disable=bare-except
         return None
 def load_image_uri_impl(uri):
     try:
         return imageArrayToStruct(_reverseChannels(loader(uri)))
     except BaseException:  # pylint: disable=bare-except
         return None
 def pil_load_spimg(fpath):
     from PIL import Image
     import numpy as np
     img_arr = np.array(Image.open(fpath), dtype=np.uint8)
     # PIL is RGB, image schema is BGR => need to flip the channels
     return imageArrayToStruct(_reverseChannels(img_arr))
 def keras_load_spimg(fpath):
     # Keras loads image in RGB order, ImageSchema expects BGR => need to flip
     return imageArrayToStruct(_reverseChannels(keras_load_img(fpath)))
 def check_image_round_trip(img_arr):
     spimg_dict = imageArrayToStruct(img_arr).asDict()
     spimg_dict['data'] = bytes(spimg_dict['data'])
     img_arr_out = exec_gfn_spimg_decode(spimg_dict, spimg_dict['mode'])
     self.assertTrue(np.all(img_arr_out == img_arr))
示例#35
0
 def do_nothing(imgRow):
     imType = imageIO.imageType(imgRow)
     array = imageIO.imageStructToArray(imgRow)
     return imageIO.imageArrayToStruct(array, imType.sparkMode)