def _add_jpeg_decoding(module_spec): """Adds operations that perform JPEG decoding and resizing to the graph... Args: module_spec: The hub.ModuleSpec for the image module being used. Returns: Tensors for the node to feed JPEG data into, and the output of the preprocessing steps. """ input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) # 3 # placeholder Tensor of any size, capable of taking current input.shape() = [?, image_height, image_width, num_channels=3] jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') # Decode a single JPEG-encoded image to a unit8 tensor, with the desired number of color channels (3 in this case) for decoded img: decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype( decoded_image, tf.float32) # Insert a "batch dimension" of 1 to the existing decoded_image_as_float tensor so size is now: [1, ?, image_height, image_width, 3] decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) ''' Tensors are decoded and represented as 3-d unit8 tensors of shape [height, width, channels], that is shape=(3,) (see: https://www.tensorflow.org/api_guides/python/image). This tf.stack call seems to go from: [input_height=299, input_width=299] -> [input_height=299, input_width=299] with .shape == (2,) e.g. row vector I don't see why this call is here: ''' resize_shape = tf.stack([input_height, input_width]) # Switch back to int32, not sure why we do this, probably to save memory space? Float precision for [0-255] is unnecessary. resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) # resize the decoded image using bilinear interpolation, this produces shape (1, 299, 299, 3) at runtime for a single image. # I am not sure why this is needed for a scalar decoded image, although I see how this might be needed for a batch of images: resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) return jpeg_data, resized_image
def create_module_graph(module_spec): """Creates a graph and loads Hub Module into it. Args: module_spec: the hub.ModuleSpec for the image module being used. Returns: graph: the tf.Graph that was created. bottleneck_tensor: the bottleneck values output by the module. jpeg_data for the node to feed JPEG data into """ height, width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) with tf.Graph().as_default() as graph: jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype(decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) resize_shape = tf.stack([height, width]) resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) m = hub.Module(module_spec) bottleneck_tensor = m(resized_image) return graph, bottleneck_tensor, jpeg_data
def get_bottleneck_tensor(input_jpeg_str): # type: tf.Tensor -> tf.Tensor """Calculates the bottleneck tensor for input JPEG string tensor. This function will resize/encode the image as required by Inception V3 model. Then it will run it through the InceptionV3 checkpoint to calculate bottleneck values. Args: input_jpeg_str: Tensor for input JPEG image. Returns: bottleneck_tensor: Tensor for output bottleneck Tensor. """ module_spec = tensorflow_hub.load_module_spec(_FEATURE_VECTORS_MODULE_URL) input_height, input_width = tensorflow_hub.get_expected_image_size( module_spec) input_depth = tensorflow_hub.get_num_image_channels(module_spec) decoded_image = tf.image.decode_jpeg(input_jpeg_str, channels=input_depth) decoded_image_as_float = tf.image.convert_image_dtype( decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) resize_shape = tf.stack([input_height, input_width]) resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image_4d = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) m = tensorflow_hub.Module(module_spec) bottleneck_tensor = m(resized_image_4d) return bottleneck_tensor
def download_image_model(mdl_url): # type: str -> (tensorflow_hub.Module, int, int, int) """Returns the Tensorflow Hub model used to process images.""" module_spec = tensorflow_hub.load_module_spec(mdl_url) input_height, input_width = tensorflow_hub.get_expected_image_size( module_spec) input_depth = tensorflow_hub.get_num_image_channels(module_spec) m = tensorflow_hub.Module(module_spec) return (m, input_height, input_width, input_depth)
def add_jpeg_decoding(module_spec): input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) decoded_image_as_float = tf.image.convert_image_dtype( decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) resize_shape = tf.stack([input_height, input_width]) resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) return jpeg_data, resized_image
def add_image_deocding(module): input_height, input_width = hub.get_expected_image_size(module) input_depth = hub.get_num_image_channels(module) input_file = tf.placeholder(tf.string, name='InputFile') file_reader = tf.read_file(input_file) decoded_image = tf.image.decode_png(file_reader, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype( decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) resize_shape = tf.stack([input_height, input_width]) resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) return input_file, resized_image
def add_jpeg_decoding(module_spec): """Adds operations that perform JPEG decoding and resizing to the graph""" input_height, input_width = hub.get_expected_image_size(module_spec) jpeg_data = tf.placeholder( tf.string, name='DecodeJPGInput' ) # Never evaluate the placeholder directly, always feed it. decoded_image_as_float = tf.image.convert_image_dtype( tf.image.decode_jpeg(jpeg_data, channels=hub.get_num_image_channels(module_spec)), tf.float32) resize_shape = tf.cast(tf.stack([input_height, input_width]), dtype=tf.int32) # Cast it as an int resized_image = tf.image.resize_bilinear( tf.expand_dims(decoded_image_as_float, 0), resize_shape) return jpeg_data, resized_image
def add_jpeg_decoding(module_spec): """Adds operations that perform JPEG decoding and resizing to the graph.""" input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype( decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) resize_shape = tf.stack([input_height, input_width]) resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) return jpeg_data, resized_image
def add_input_distortions(flip_left_right, random_crop, random_scale, random_brightness, module_spec): tf.logging.info( "Adding distortions. Flip: {}, crop {}%, scale {}%, brigthness {}%". format(flip_left_right, random_crop, random_scale, random_brightness)) jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) decoded_image_as_float = tf.image.convert_image_dtype( decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) margin_scale = 1.0 + (random_crop / 100.0) resize_scale = 1.0 + (random_scale / 100.0) margin_scale_value = tf.constant(margin_scale) resize_scale_value = tf.random_uniform(shape=[], minval=1.0, maxval=resize_scale) scale_value = tf.multiply(margin_scale_value, resize_scale_value) precrop_width = tf.multiply(scale_value, input_width) precrop_height = tf.multiply(scale_value, input_height) precrop_shape = tf.stack([precrop_height, precrop_width]) precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) precropped_image = tf.image.resize_bilinear(decoded_image_4d, precrop_shape_as_int) precropped_image_3d = tf.squeeze(precropped_image, axis=[0]) cropped_image = tf.random_crop(precropped_image_3d, [input_height, input_width, input_depth]) if flip_left_right: flipped_image = tf.image.random_flip_left_right(cropped_image) else: flipped_image = cropped_image brightness_min = 1.0 - (random_brightness / 100.0) brightness_max = 1.0 + (random_brightness / 100.0) brightness_value = tf.random_uniform(shape=[], minval=brightness_min, maxval=brightness_max) brightened_image = tf.multiply(flipped_image, brightness_value) distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult') return jpeg_data, distort_result
def add_input_distortions(flip_left_right, random_crop, random_scale, random_brightness, module_spec): """"Perform the necessary ransom transformations. Used for Data Augmentation.""" # Extract the required image info input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') JPEG_image = tf.image.convert_image_dtype( tf.image.decode_jpeg(jpeg_data, channels=input_depth), tf.float32) # This is a JPEG representation # Scale and crop the image, need to extract the shape and resize the image first. scale_value = tf.multiply( tf.constant(1.0 + (random_crop / 100.0)), tf.random_uniform(shape=[], minval=1.0, maxval=1.0 + (random_scale / 100.0))) crop_shape = tf.cast(tf.stack([ tf.multiply(scale_value, input_height), tf.multiply(scale_value, input_width) ]), dtype=tf.int32) precropped_image = tf.squeeze(tf.image.resize_bilinear( tf.expand_dims(JPEG_image, 0), crop_shape), axis=[0]) cropped_image = tf.random_crop(precropped_image, [input_height, input_width, input_depth]) # Flip the image if needed. if flip_left_right: flipped_image = tf.image.random_flip_left_right(cropped_image) else: flipped_image = cropped_image # Do nothing # Change the brightness. brightness_value = tf.random_uniform( shape=[], minval=1.0 - (random_brightness / 100.0), maxval=1.0 + (random_brightness / 100.0)) distort_result = tf.expand_dims( tf.multiply(flipped_image, brightness_value), 0, name='DistortResult' ) # Inserts a dimension of 1 into the tensor's shape. # Return the placeholder and the distorted tensor. return jpeg_data, distort_result
def add_jpeg_decoding(module_spec): """Adds operations that perform JPEG decoding and resizing to the graph.. Args: module_spec: The hub.ModuleSpec for the image module being used. Returns: Tensors for the node to feed JPEG data into, and the output of the preprocessing steps. """ input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype(decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) resize_shape = tf.stack([input_height, input_width]) resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) return jpeg_data, resized_image
def decode_and_resize(hub_module): """Performs image processing steps(decoding and reshaping) Args: hub_module: Tensorflow Hub module Returns: placeholder for image data reshaped tensor as expected by graph """ module = hub.load_module_spec(hub_module) h, w = hub.get_expected_image_size(module) reshape_specs = tf.stack((h, w)) num_channels = hub.get_num_image_channels(module) data_placeholder = tf.placeholder(tf.string, name='data_placeholder') decode = tf.image.decode_jpeg(data_placeholder, channels=num_channels) decode = tf.image.convert_image_dtype(decode, tf.float32) decode = tf.expand_dims(decode, 0) reshape = tf.cast(reshape_specs, dtype=tf.int32) reshaped_image = tf.image.resize_bilinear(decode, reshape) return data_placeholder, reshaped_image
def __init__(self): # member variables. # the URL of the pre-trained model. self.HUB_MODULE = 'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1' # the model spec. self.Module_Spec = hub.load_module_spec(self.HUB_MODULE) # the image size that is required by this model. self.Module_Height, self.Module_Width = hub.get_expected_image_size( self.Module_Spec) self.Modelu_Depth = hub.get_num_image_channels(self.Module_Spec) # A module is understood as instrumented for quantization with TF-Lite # if it contains any of these ops. self.FAKE_QUANT_OPS = ('FakeQuantWithMinMaxVars', 'FakeQuantWithMinMaxVarsPerChannel') # the size of our input images. self.ImageHeight = self.Module_Height self.ImageWidth = self.Module_Width self.ImageChannels = self.Modelu_Depth # Set up the pre-trained graph. self.graph, self.bottleneck_tensor, self.resized_input_tensor, self.wants_quantization = self.create_module_graph( self.Module_Spec)
def add_jpeg_decoding(module_spec): """ 添加執行JPEG解碼和調整大小的操作。 ARGS: module_spec:正在使用的映像模塊的hub.ModuleSpec。 返回: 節點的張量將JPEG數據輸入到輸出中 預處理步驟。 """ input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) #從全範圍的uint8轉換為float32的範圍[0,1]。 decoded_image_as_float = tf.image.convert_image_dtype(decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)#擴充形狀的維度 resize_shape = tf.stack([input_height, input_width]) #通過合並提升維度 resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) #放縮圖像尺寸 return jpeg_data, resized_image
def add_input_distortions(flip_left_right, random_crop, random_scale, random_brightness, module_spec): """Creates the operations to apply the specified distortions. During training it can help to improve the results if we run the images through simple distortions like crops, scales, and flips. These reflect the kind of variations we expect in the real world, and so can help train the model to cope with natural data more effectively. Here we take the supplied parameters and construct a network of operations to apply them to an image. Cropping ~~~~~~~~ Cropping is done by placing a bounding box at a random position in the full image. The cropping parameter controls the size of that box relative to the input image. If it's zero, then the box is the same size as the input and no cropping is performed. If the value is 50%, then the crop box will be half the width and height of the input. In a diagram it looks like this: < width > +---------------------+ | | | width - crop% | | < > | | +------+ | | | | | | | | | | | | | | +------+ | | | | | +---------------------+ Scaling ~~~~~~~ Scaling is a lot like cropping, except that the bounding box is always centered and its size varies randomly within the given range. For example if the scale percentage is zero, then the bounding box is the same size as the input and no scaling is applied. If it's 50%, then the bounding box will be in a random range between half the width and height and full size. Args: flip_left_right: Boolean whether to randomly mirror images horizontally. random_crop: Integer percentage setting the total margin used around the crop box. random_scale: Integer percentage of how much to vary the scale by. random_brightness: Integer range to randomly multiply the pixel values by. graph. module_spec: The hub.ModuleSpec for the image module being used. Returns: The jpeg input layer and the distorted result tensor. """ input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype( decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) margin_scale = 1.0 + (random_crop / 100.0) resize_scale = 1.0 + (random_scale / 100.0) margin_scale_value = tf.constant(margin_scale) resize_scale_value = tf.random_uniform(shape=[], minval=1.0, maxval=resize_scale) scale_value = tf.multiply(margin_scale_value, resize_scale_value) precrop_width = tf.multiply(scale_value, input_width) precrop_height = tf.multiply(scale_value, input_height) precrop_shape = tf.stack([precrop_height, precrop_width]) precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) precropped_image = tf.image.resize_bilinear(decoded_image_4d, precrop_shape_as_int) precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0]) cropped_image = tf.random_crop(precropped_image_3d, [input_height, input_width, input_depth]) if flip_left_right: flipped_image = tf.image.random_flip_left_right(cropped_image) else: flipped_image = cropped_image brightness_min = 1.0 - (random_brightness / 100.0) brightness_max = 1.0 + (random_brightness / 100.0) brightness_value = tf.random_uniform(shape=[], minval=brightness_min, maxval=brightness_max) brightened_image = tf.multiply(flipped_image, brightness_value) distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult') return jpeg_data, distort_result
def add_input_distortions(flip_left_right, random_crop, random_scale, random_brightness, module_spec): """ 創建應用指定扭曲的操作。 生成變形圖片操作ops的函數add_input_distortions 在訓練的過程中我們對圖片進行一些變形(裁切、放縮、翻轉或調整亮度),可以利用有限數量的圖片模擬更多的真實情況,進而有效改進模型。 裁剪 ~~~~~~~~ 通過將邊界框放置在完整圖像中的隨機位置來完成裁剪。 cropping參數控制該框相對於輸入圖像的大小。 如果它為零,則該框與輸入的大小相同,並且不執行裁剪。 如果值為50%,則裁剪框將為輸入的寬度和高度的一半。 在圖中它看起來像這樣: < width > +---------------------+ | | | width - crop% | | < > | | +------+ | | | | | | | | | | | | | | +------+ | | | | | +---------------------+ 縮放 ~~~~~~~ 縮放很像裁剪,除了邊界框始終居中並且其大小在給定範圍內隨機變化。 例如,如果比例百分比為零,則邊界框與輸入的大小相同,並且不應用縮放。 如果它是50%,那麼邊界框將在寬度和高度的一半與全尺寸之間的隨機範圍內。 ARGS: flip_left_right:Boolean是否水平隨機鏡像圖像。 random_crop:整數百分比設置周圍使用的總保證金 裁剪框。 random_scale:縮放比例的整數百分比。 random_brightness:整數範圍,用於隨機乘以像素值。 圖形。 module_spec:正在使用的映像模塊的hub.ModuleSpec。 返回: jpeg輸入層和失真結果張量。 """ input_height, input_width = hub.get_expected_image_size(module_spec)#獲取已有模型中的寬高要求 input_depth = hub.get_num_image_channels(module_spec)#獲取模型中圖片通道深度數 jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') #feed_dict輸入口 decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) #讀取圖片數據 #從uint8的全範圍轉換到float32的範圍[0,1]。 decoded_image_as_float = tf.image.convert_image_dtype(decoded_image, tf.float32)#數據類型轉換 decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) #升維 #對圖片數據進行裁切和放縮 margin_scale = 1.0 + (random_crop / 100.0)#參數範圍0~100 resize_scale = 1.0 + (random_scale / 100.0)#參數範圍0~100 margin_scale_value = tf.constant(margin_scale)#轉為張量 resize_scale_value = tf.random_uniform(shape=[], minval=1.0, maxval=resize_scale)#轉為張量 scale_value = tf.multiply(margin_scale_value, resize_scale_value) precrop_width = tf.multiply(scale_value, input_width) precrop_height = tf.multiply(scale_value, input_height) precrop_shape = tf.stack([precrop_height, precrop_width]) precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) precropped_image = tf.image.resize_bilinear(decoded_image_4d, precrop_shape_as_int) precropped_image_3d = tf.squeeze(precropped_image, axis=[0]) cropped_image = tf.random_crop(precropped_image_3d, [input_height, input_width, input_depth]) #對圖片進行翻轉 if flip_left_right: flipped_image = tf.image.random_flip_left_right(cropped_image) else: flipped_image = cropped_image #調整圖片亮度 brightness_min = 1.0 - (random_brightness / 100.0)#random_brightness參數範圍0~100 brightness_max = 1.0 + (random_brightness / 100.0) brightness_value = tf.random_uniform(shape=[], minval=brightness_min, maxval=brightness_max) brightened_image = tf.multiply(flipped_image, brightness_value) distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult') return jpeg_data, distort_result
def add_input_distortions(flip_left_right, random_crop, random_scale, random_brightness, module_spec): """Creates the operations to apply the specified distortions. During training it can help to improve the results if we run the images through simple distortions like crops, scales, and flips. These reflect the kind of variations we expect in the real world, and so can help train the model to cope with natural data more effectively. Here we take the supplied parameters and construct a network of operations to apply them to an image. Cropping ~~~~~~~~ Cropping is done by placing a bounding box at a random position in the full image. The cropping parameter controls the size of that box relative to the input image. If it's zero, then the box is the same size as the input and no cropping is performed. If the value is 50%, then the crop box will be half the width and height of the input. In a diagram it looks like this: < width > +---------------------+ | | | width - crop% | | < > | | +------+ | | | | | | | | | | | | | | +------+ | | | | | +---------------------+ Scaling ~~~~~~~ Scaling is a lot like cropping, except that the bounding box is always centered and its size varies randomly within the given range. For example if the scale percentage is zero, then the bounding box is the same size as the input and no scaling is applied. If it's 50%, then the bounding box will be in a random range between half the width and height and full size. Args: flip_left_right: Boolean whether to randomly mirror images horizontally. random_crop: Integer percentage setting the total margin used around the crop box. random_scale: Integer percentage of how much to vary the scale by. random_brightness: Integer range to randomly multiply the pixel values by. graph. module_spec: The hub.ModuleSpec for the image module being used. Returns: The jpeg input layer and the distorted result tensor. """ input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype(decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) margin_scale = 1.0 + (random_crop / 100.0) resize_scale = 1.0 + (random_scale / 100.0) margin_scale_value = tf.constant(margin_scale) resize_scale_value = tf.random_uniform(shape=[], minval=1.0, maxval=resize_scale) scale_value = tf.multiply(margin_scale_value, resize_scale_value) precrop_width = tf.multiply(scale_value, input_width) precrop_height = tf.multiply(scale_value, input_height) precrop_shape = tf.stack([precrop_height, precrop_width]) precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32) precropped_image = tf.image.resize_bilinear(decoded_image_4d, precrop_shape_as_int) precropped_image_3d = tf.squeeze(precropped_image, squeeze_dims=[0]) cropped_image = tf.random_crop(precropped_image_3d, [input_height, input_width, input_depth]) if flip_left_right: flipped_image = tf.image.random_flip_left_right(cropped_image) else: flipped_image = cropped_image brightness_min = 1.0 - (random_brightness / 100.0) brightness_max = 1.0 + (random_brightness / 100.0) brightness_value = tf.random_uniform(shape=[], minval=brightness_min, maxval=brightness_max) brightened_image = tf.multiply(flipped_image, brightness_value) distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult') return jpeg_data, distort_result
def prepare_file_system(): # Set up the directory we'll write summaries to for TensorBoard if tf.gfile.Exists(FLAGS.summaries_dir): tf.gfile.DeleteRecursively(FLAGS.summaries_dir) tf.gfile.MakeDirs(FLAGS.summaries_dir) if FLAGS.intermediate_store_frequency > 0: ensure_dir_exists(FLAGS.intermediate_output_graphs_dir) return def add_jpeg_decoding(module_spec): input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec) jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) # Convert from full range of uint8 to range [0,1] of float32. decoded_image_as_float = tf.image.convert_image_dtype(decoded_image, tf.float32) decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) resize_shape = tf.stack([input_height, input_width]) resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) resized_image = tf.image.resize_bilinear(decoded_image_4d, resize_shape_as_int) return jpeg_data, resized_image def export_model(module_spec, class_count, saved_model_dir):
def add_input_distortions(flip_left_right, random_crop, random_scale, input_height, input_width = hub.get_expected_image_size(module_spec) input_depth = hub.get_num_image_channels(module_spec)
def __init__(self, flags, n_classes): self.n_classes = n_classes # Pasamos las variables relacionadas a la estructura del experimento self.experiment_name = flags.experiment_name self.model_name = flags.model_name self.logs_and_checkpoints_dir = flags.logs_and_checkpoints_dir self.export_model_dir = flags.export_model_dir self.results_dir = flags.results_dir self.remove_prev_ckpts_and_logs = flags.remove_prev_ckpts_and_logs self.random_seed = flags.random_seed # Directorios de bottlenecks self.train_bottlenecks_dir = flags.train_bottlenecks_dir self.validation_bottlenecks_dir = flags.validation_bottlenecks_dir self.test_bottlenecks_dir = flags.test_bottlenecks_dir # Pasamos las variables relacionadas al dataset # images_dir: hacemos esto para asegurar que el directorio esté # en formato absoluto, y además para que tenga un slash al final # siempre self.train_images_dir = os.path.join( os.path.abspath(flags.train_images_dir), "") self.validation_images_dir = os.path.join( os.path.abspath(flags.validation_images_dir), "") self.test_images_dir = os.path.join( os.path.abspath(flags.test_images_dir), "") # Variables de distorsiones aleatorias self.flip_left_right = flags.flip_left_right self.random_crop = flags.random_crop self.random_scale = flags.random_scale self.random_brightness = flags.random_brightness # Pasamos las variables relacionadas al entrenamiento self.train_batch_size = flags.train_batch_size self.validation_batch_size = flags.validation_batch_size self.test_batch_size = flags.test_batch_size self.num_epochs = flags.num_epochs self.learning_rate = flags.learning_rate self.tensors_to_log_train = flags.tensors_to_log_train self.tensors_to_log_val = flags.tensors_to_log_val self.save_checkpoints_steps = flags.save_checkpoints_steps self.eval_frequency = flags.eval_frequency self.fine_tuning = flags.fine_tuning # Otras variables importantes self.cache_bottlenecks = not self.fine_tuning and \ not tf_data_utils.should_distort_images( self.flip_left_right, self.random_crop, self.random_scale, self.random_brightness) # Obtenemos el module_spec correspondiente module_url = get_module_url(self.model_name) self.module_spec = hub.load_module_spec(module_url) self.module_image_shape = hub.get_expected_image_size(self.module_spec) self.module_image_depth = hub.get_num_image_channels(self.module_spec) self.__init_log_and_random_seeds() self.__prepare_filesystem() self.__save_config_file(flags) self.estimator = self.__build_estimator(mode="train")
epochs = args.epochs batch_size = args.batch_size learning_rate = args.learning_rate csv_out = args.csv_output pred_out = args.prediction_output dropout = args.dropout save_models = args.save import_features = args.import_features tfhub = args.tfhub_module ##### LOAD IMAGES ###### if tfhub != None: module_spec = hub.load_module_spec(tfhub) height, width = hub.get_expected_image_size(module_spec) channels = hub.get_num_image_channels(module_spec) else: height, width, channels = 224, 224, 3 ### training images # read paths and labels for each image listimgs, listlabels = parse_input(train_paths) # load images loaded_imgs = [load_image(img, size=height).reshape((height, width, channels)) for img in listimgs] print('[TRAINING] Loaded', len(loaded_imgs), 'images and', len(listlabels), 'labels') # map string labels to unique integers u,indices = np.unique(np.array(listlabels), return_inverse=True) print('[TRAINING] Categories: ', u) num_categories = len(u) ### validation images
from __future__ import division from __future__ import print_function import tensorflow as tf import tensorflow_hub as hub import numpy as np #################### Global Variables. #################### # the URL of the pre-trained model. HUB_MODULE = 'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/1' # the model spec. Module_Spec = hub.load_module_spec(HUB_MODULE) # the image size that is required by this model. Module_Height, Module_Width = hub.get_expected_image_size(Module_Spec) Modelu_Depth = hub.get_num_image_channels(Module_Spec) # A module is understood as instrumented for quantization with TF-Lite # if it contains any of these ops. FAKE_QUANT_OPS = ('FakeQuantWithMinMaxVars', 'FakeQuantWithMinMaxVarsPerChannel') # the size of our input images. ImageHeight = Module_Height ImageWidth = Module_Width ImageChannels = Modelu_Depth #################### Tensorflow Settings. #################### # Output the logging info. tf.logging.set_verbosity(tf.logging.INFO)