def ___conv4_block(input, k=1, dropout=0.0): init = input channel_axis = 1 if K.image_dim_ordering() == 'th' else -1 # Check if input number of filters is same as 64 * k, else # create convolution2d for this input if K.image_dim_ordering() == 'th': if init._keras_shape[1] != 64 * k: init = Conv2D(64 * k, (1, 1), activation='linear', padding='same')(init) else: if init._keras_shape[-1] != 64 * k: init = Conv2D(64 * k, (1, 1), activation='linear', padding='same')(init) x = Conv2D(64 * k, (3, 3), padding='same')(input) x = BatchNormalization(axis=channel_axis)(x) x = Activation('relu')(x) if dropout > 0.0: x = Dropout(dropout)(x) x = Conv2D(64 * k, (3, 3), padding='same')(x) x = BatchNormalization(axis=channel_axis)(x) x = Activation('relu')(x) m = add([init, x]) return m
def nn_base(input_tensor=None, trainable=False): # Determine proper input shape if K.image_dim_ordering() == 'th': input_shape = (3, None, None) else: input_shape = (None, None, 3) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_dim_ordering() == 'tf': bn_axis = 3 else: bn_axis = 1 # Block 1 x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) # x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) return x
def preprocess_image_for_generating(image_path, size_multiple=4): img = imread(image_path, mode="RGB") # Prevents crashes due to PNG images (ARGB) org_w = img.shape[0] org_h = img.shape[1] aspect_ratio = org_h / org_w size = org_w if org_w > org_h else org_h pad_w = (size - org_w) // 2 pad_h = (size - org_h) // 2 tf_session = K.get_session() kvar = K.variable(value=img) paddings = [[pad_w, pad_w], [pad_h, pad_h], [0, 0]] squared_img = tf.pad(kvar, paddings, mode='REFLECT', name=None) img = K.eval(squared_img) img_width = (squared_img.shape[1] // size_multiple) * size_multiple # Make sure width is a multiple of 4 img_height = (squared_img.shape[0] // size_multiple) * size_multiple # Make sure width is a multiple of 4 img = imresize(img, (img_width, img_height), interp='nearest') if K.image_dim_ordering() == "Th": img = img.transpose((2, 0, 1)).astype(np.float32) else: img = img.astype(np.float32) img = np.expand_dims(img, axis=0) return ((org_w, org_h), (img_width.value, img_height.value), img)
def __init__(self, img_size, min_size, max_size=None, aspect_ratios=None, flip=True, variances=[0.1], clip=True, **kwargs): if K.image_dim_ordering() == 'tf': self.waxis = 2 self.haxis = 1 else: self.waxis = 3 self.haxis = 2 self.img_size = img_size if min_size <= 0: raise Exception('min_size must be positive.') self.min_size = min_size self.max_size = max_size self.aspect_ratios = [1.0] if max_size: if max_size < min_size: raise Exception('max_size must be greater than min_size.') self.aspect_ratios.append(1.0) if aspect_ratios: for ar in aspect_ratios: if ar in self.aspect_ratios: continue self.aspect_ratios.append(ar) if flip: self.aspect_ratios.append(1.0 / ar) self.variances = np.array(variances) self.clip = True super(PriorBox, self).__init__(**kwargs)
def __init__(self, X, y, image_data_generator, batch_size=32, shuffle=False, seed=None, dim_ordering='default', save_to_dir=None, save_prefix='', save_format='jpeg'): if y is not None and len(X) != len(y): raise Exception('X (images tensor) and y (labels) ' 'should have the same length. ' 'Found: X.shape = %s, y.shape = %s' % (np.asarray(X).shape, np.asarray(y).shape)) if dim_ordering == 'default': dim_ordering = K.image_dim_ordering() self.X = X self.y = y self.image_data_generator = image_data_generator self.dim_ordering = dim_ordering self.save_to_dir = save_to_dir self.save_prefix = save_prefix self.save_format = save_format super(NumpyArrayIterator, self).__init__(X.shape[0], batch_size, shuffle, seed)
def __init__(self, scale, **kwargs): if K.image_dim_ordering() == 'tf': self.axis = 3 else: self.axis = 1 self.scale = scale super(Normalize, self).__init__(**kwargs)
def __init__(self, gamma_init=20, **kwargs): if K.image_dim_ordering() == 'tf': self.axis = 3 else: self.axis = 1 self.gamma_init = gamma_init super(L2Normalization, self).__init__(**kwargs)
def conv_block_td(input_tensor, kernel_size, filters, stage, block, input_shape, strides=(2, 2), trainable=True): # conv block time distributed nb_filter1, nb_filter2, nb_filter3 = filters if K.image_dim_ordering() == 'tf': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = TimeDistributed(Conv2D(nb_filter1, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), input_shape=input_shape, name=conv_name_base + '2a')(input_tensor) x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = TimeDistributed(Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2b')(x) x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = TimeDistributed(Conv2D(nb_filter3, (1, 1), kernel_initializer='normal'), name=conv_name_base + '2c', trainable=trainable)(x) x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x) shortcut = TimeDistributed(Conv2D(nb_filter3, (1, 1), strides=strides, trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '1')(input_tensor) shortcut = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '1')(shortcut) x = Add()([x, shortcut]) x = Activation('relu')(x) return x
def preprocess_input(x, dim_ordering='default', mean=None, std=None): if dim_ordering == 'default': dim_ordering = K.image_dim_ordering() assert dim_ordering in {'tf', 'th'} if mean is not None: x = x - np.array(mean, dtype='float32') if std is not None: if 0.0 in std: raise ValueError('0 is not allowed as a custom std.') x = x / np.array(std, dtype='float32') if mean is None and std is None: if dim_ordering == 'th': x[:, 0, :, :] -= 103.939 x[:, 1, :, :] -= 116.779 x[:, 2, :, :] -= 123.68 # 'RGB'->'BGR' x = x[:, ::-1, :, :] else: x[:, :, :, 0] -= 103.939 x[:, :, :, 1] -= 116.779 x[:, :, :, 2] -= 123.68 # 'RGB'->'BGR' x = x[:, :, :, ::-1] return x
def compute_output_shape(self, input_shape): if K.image_dim_ordering() == 'tf': batch_size, feature_map_height, feature_map_width, feature_map_channels = input_shape else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future batch_size, feature_map_channels, feature_map_height, feature_map_width = input_shape return (batch_size, feature_map_height, feature_map_width, self.n_boxes, 8)
def build(input_shape, num_outputs, block_fn, repetitions): """Builds a custom ResNet like architecture. Args: input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols) num_outputs: The number of outputs at final softmax layer block_fn: The block function to use. This is either `basic_block` or `bottleneck`. The original paper used basic_block for layers < 50 repetitions: Number of repetitions of various block units. At each block unit, the number of filters are doubled and the input size is halved Returns: The keras `Model`. """ _handle_dim_ordering() if len(input_shape) != 3: raise Exception( "Input shape should be a tuple (nb_channels, nb_rows, nb_cols)" ) # Permute dimension order if necessary if K.image_dim_ordering() == 'tf': input_shape = (input_shape[1], input_shape[2], input_shape[0]) # Load function from str if needed. block_fn = _get_block(block_fn) input = Input(shape=input_shape) conv1 = _conv_bn_relu(filters=64, kernel_size=(7, 7), strides=(2, 2))(input) pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(conv1) block = pool1 filters = 64 for i, r in enumerate(repetitions): block = _residual_block(block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block) filters *= 2 # Last activation block = _bn_relu(block) # Classifier block block_shape = K.int_shape(block) pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]), strides=(1, 1))(block) flatten1 = Flatten()(pool2) dense = Dense(units=num_outputs, kernel_initializer="he_normal", activation="softmax")(flatten1) model = Model(inputs=input, outputs=dense) return model
def __init__(self, pool_size, num_rois, **kwargs): self.dim_ordering = K.image_dim_ordering() assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}' self.pool_size = pool_size self.num_rois = num_rois super(RoiPoolingConv, self).__init__(**kwargs)
def _handle_dim_ordering(): global ROW_AXIS global COL_AXIS global CHANNEL_AXIS if K.image_dim_ordering() == 'tf': ROW_AXIS = 1 COL_AXIS = 2 CHANNEL_AXIS = 3 else: CHANNEL_AXIS = 1 ROW_AXIS = 2 COL_AXIS = 3
def deprocess_image(x): if K.image_dim_ordering() == 'th': x = x.reshape((3, img_nrows, img_ncols)) x = x.transpose((1, 2, 0)) else: x = x.reshape((img_nrows, img_ncols, 3)) x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 # 'BGR'->'RGB' x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') return x
def __init__(self, featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, rotation_range=0., width_shift_range=0., height_shift_range=0., shear_range=0., zoom_range=0., channel_shift_range=0., fill_mode='nearest', cval=0., horizontal_flip=False, vertical_flip=False, rescale=None, dim_ordering='default'): if dim_ordering == 'default': dim_ordering = K.image_dim_ordering() self.__dict__.update(locals()) self.mean = None self.std = None self.principal_components = None self.rescale = rescale if dim_ordering not in {'tf', 'th'}: raise Exception( 'dim_ordering should be "tf" (channel after row and ' 'column) or "th" (channel before row and column). ' 'Received arg: ', dim_ordering) self.dim_ordering = dim_ordering if dim_ordering == 'th': self.channel_index = 1 self.row_index = 2 self.col_index = 3 if dim_ordering == 'tf': self.channel_index = 3 self.row_index = 1 self.col_index = 2 if np.isscalar(zoom_range): self.zoom_range = [1 - zoom_range, 1 + zoom_range] elif len(zoom_range) == 2: self.zoom_range = [zoom_range[0], zoom_range[1]] else: raise Exception( 'zoom_range should be a float or ' 'a tuple or list of two floats. ' 'Received arg: ', zoom_range)
def deprocess_image(x, img_width=256, img_height=256): if K.image_dim_ordering() == 'th': x = x.reshape((3, img_nrows, img_ncols)) x = x.transpose((1, 2, 0)) else: x = x.reshape((img_width, img_height, 3)) # Remove zero-center by mean pixel x[:, :, 0] += 103.939 x[:, :, 1] += 116.779 x[:, :, 2] += 123.68 # 'BGR'->'RGB' x = x[:, :, ::-1] x = np.clip(x, 0, 255).astype('uint8') return x
def gram_matrix(x): assert K.ndim(x) == 3 if K.image_dim_ordering() == 'th': features = K.batch_flatten(x) else: features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) shape = K.shape(x) C, W, H = (shape[0], shape[1], shape[2]) cf = K.reshape(features, (C, -1)) gram = K.dot(cf, K.transpose(cf)) / K.cast(C * W * H, dtype='float32') return gram
def identity_block_td(input_tensor, kernel_size, filters, stage, block, trainable=True): # identity block time distributed nb_filter1, nb_filter2, nb_filter3 = filters if K.image_dim_ordering() == 'tf': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = TimeDistributed(Convolution2D(nb_filter1, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2a')(input_tensor) x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2a')(x) x = Activation('relu')(x) x = TimeDistributed(Convolution2D(nb_filter2, (kernel_size, kernel_size), trainable=trainable, kernel_initializer='normal', padding='same'), name=conv_name_base + '2b')(x) x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = TimeDistributed(Convolution2D(nb_filter3, (1, 1), trainable=trainable, kernel_initializer='normal'), name=conv_name_base + '2c')(x) x = TimeDistributed(BatchNormalization(axis=bn_axis), name=bn_name_base + '2c')(x) x = Add()([x, input_tensor]) x = Activation('relu')(x) return x
def array_to_img(x, dim_ordering='default', scale=True): if dim_ordering == 'default': dim_ordering = K.image_dim_ordering() if dim_ordering == 'th': x = x.transpose(1, 2, 0) if scale: x += max(-np.min(x), 0) x /= np.max(x) x *= 255 if x.shape[2] == 3: # RGB return Image.fromarray(x.astype('uint8'), 'RGB') elif x.shape[2] == 1: # grayscale return Image.fromarray(x[:, :, 0].astype('uint8'), 'L') else: raise Exception('Unsupported channel number: ', x.shape[2])
def preprocess_input(x, dim_ordering='default'): if dim_ordering == 'default': dim_ordering = K.image_dim_ordering() assert dim_ordering in {'tf', 'th'} if dim_ordering == 'th': x[:, 0, :, :] -= 103.939 x[:, 1, :, :] -= 116.779 x[:, 2, :, :] -= 123.68 # 'RGB'->'BGR' x = x[:, ::-1, :, :] else: x[:, :, :, 0] -= 103.939 x[:, :, :, 1] -= 116.779 x[:, :, :, 2] -= 123.68 # 'RGB'->'BGR' x = x[:, :, :, ::-1] return x
def img_to_array(img, dim_ordering='default'): if dim_ordering == 'default': dim_ordering = K.image_dim_ordering() if dim_ordering not in ['th', 'tf']: raise Exception('Unknown dim_ordering: ', dim_ordering) # image has dim_ordering (height, width, channel) x = np.asarray(img, dtype='float32') if len(x.shape) == 3: if dim_ordering == 'th': x = x.transpose(2, 0, 1) elif len(x.shape) == 2: if dim_ordering == 'th': x = x.reshape((1, x.shape[0], x.shape[1])) else: x = x.reshape((x.shape[0], x.shape[1], 1)) else: raise Exception('Unsupported image shape: ', x.shape) return x
def __call__(self, x): assert K.ndim(x.output) == 4 x_out = x.output shape = K.shape(x_out) img_width, img_height, channel = (shape[1], shape[2], shape[3]) size = img_width * img_height * channel if K.image_dim_ordering() == 'th': a = K.square(x_out[:, :, :img_width - 1, :img_height - 1] - x_out[:, :, 1:, :img_height - 1]) b = K.square(x_out[:, :, :img_width - 1, :img_height - 1] - x_out[:, :, :img_width - 1, 1:]) else: a = K.square(x_out[:, :img_width - 1, :img_height - 1, :] - x_out[:, 1:, :img_height - 1, :]) b = K.square(x_out[:, :img_width - 1, :img_height - 1, :] - x_out[:, :img_width - 1, 1:, :]) loss = self.weight * K.sum(K.pow(a + b, 1.25)) return loss
def __init__(self, padding=(1, 1), dim_ordering='default', **kwargs): super(ReflectionPadding2D, self).__init__(**kwargs) if dim_ordering == 'default': dim_ordering = K.image_dim_ordering() self.padding = padding if isinstance(padding, dict): if set(padding.keys()) <= { 'top_pad', 'bottom_pad', 'left_pad', 'right_pad' }: self.top_pad = padding.get('top_pad', 0) self.bottom_pad = padding.get('bottom_pad', 0) self.left_pad = padding.get('left_pad', 0) self.right_pad = padding.get('right_pad', 0) else: raise ValueError( 'Unexpected key found in `padding` dictionary. ' 'Keys have to be in {"top_pad", "bottom_pad", ' '"left_pad", "right_pad"}.' 'Found: ' + str(padding.keys())) else: padding = tuple(padding) if len(padding) == 2: self.top_pad = padding[0] self.bottom_pad = padding[0] self.left_pad = padding[1] self.right_pad = padding[1] elif len(padding) == 4: self.top_pad = padding[0] self.bottom_pad = padding[1] self.left_pad = padding[2] self.right_pad = padding[3] else: raise TypeError('`padding` should be tuple of int ' 'of length 2 or 4, or dict. ' 'Found: ' + str(padding)) if dim_ordering not in {'tf'}: raise ValueError('dim_ordering must be in {tf}.') self.dim_ordering = dim_ordering self.input_spec = [InputSpec(ndim=4)]
def conv3_block(self, input, k=1, dropout=0.0): init = input channel_axis = 1 if K.image_dim_ordering() == "th" else -1 x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(input) x = Activation('relu')(x) x = Convolution2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal', W_regularizer=l2(weight_decay), use_bias=False)(x) if dropout > 0.0: x = Dropout(dropout)(x) x = BatchNormalization(axis=channel_axis, momentum=0.1, epsilon=1e-5, gamma_initializer='uniform')(x) x = Activation('relu')(x) x = Convolution2D(64 * k, (3, 3), padding='same', kernel_initializer='he_normal', W_regularizer=l2(weight_decay), use_bias=False)(x) m = Add()([init, x]) return m
def deprocess_image(x): """ Same normalization as in: https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py """ if np.ndim(x) > 3: x = np.squeeze(x) # normalize tensor: center on 0., ensure std is 0.1 x = x - x.mean() x = x / (x.std() + 1e-5) x = x * 0.1 # clip to [0, 1] x = x + 0.5 x = np.clip(x, 0, 1) # convert to RGB array x = x * 255 if K.image_dim_ordering() == 'th': x = x.transpose((1, 2, 0)) x = np.clip(x, 0, 255).astype('uint8') return x
def preprocess_image(image_path, img_width=256, img_height=256, load_dims=False, resize=True, size_multiple=4): ''' Preprocess the image so that it can be used by Keras. Args: image_path: path to the image img_width: image width after resizing. Optional: defaults to 256 img_height: image height after resizing. Optional: defaults to 256 load_dims: decides if original dimensions of image should be saved, Optional: defaults to False vgg_normalize: decides if vgg normalization should be applied to image. Optional: defaults to False resize: whether the image should be resided to new size. Optional: defaults to True size_multiple: Deconvolution network needs precise input size so as to divide by 4 ("shallow" model) or 8 ("deep" model). Returns: an image of shape (3, img_width, img_height) for dim_ordering = "th", else an image of shape (img_width, img_height, 3) for dim ordering = "tf" ''' img = imread(image_path, mode="RGB") # Prevents crashes due to PNG images (ARGB) if load_dims: global img_WIDTH, img_HEIGHT, aspect_ratio img_WIDTH = img.shape[0] img_HEIGHT = img.shape[1] aspect_ratio = img_HEIGHT / img_WIDTH if resize: if img_width < 0 or img_height < 0: # We have already loaded image dims img_width = (img_WIDTH // size_multiple) * size_multiple # Make sure width is a multiple of 4 img_height = (img_HEIGHT // size_multiple) * size_multiple # Make sure width is a multiple of 4 img = imresize(img, (img_width, img_height), interp='nearest') if K.image_dim_ordering() == "th": img = img.transpose((2, 0, 1)).astype(np.float32) else: img = img.astype(np.float32) img = np.expand_dims(img, axis=0) return img
def call(self, x, mask=None): ''' Return an anchor box tensor based on the shape of the input tensor. The logic implemented here is identical to the logic in the module `ssd_box_encode_decode_utils.py`. Note that this tensor does not participate in any graph computations at runtime. It is being created as a constant once during graph creation and is just being output along with the rest of the model output during runtime. Because of this, all logic is implemented as Numpy array operations and it is sufficient to convert the resulting Numpy array into a Keras tensor at the very end before outputting it. Arguments: x (tensor): 4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'` or `(batch, height, width, channels)` if `dim_ordering = 'tf'`. The input for this layer must be the output of the localization predictor layer. ''' # Compute box width and height for each aspect ratio # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`. size = min(self.img_height, self.img_width) # Compute the box widths and and heights for all aspect ratios wh_list = [] for ar in self.aspect_ratios: if (ar == 1): # Compute the regular anchor box for aspect ratio 1. box_height = box_width = self.this_scale * size wh_list.append((box_width, box_height)) if self.two_boxes_for_ar1: # Compute one slightly larger version using the geometric mean of this scale value and the next. box_height = box_width = np.sqrt( self.this_scale * self.next_scale) * size wh_list.append((box_width, box_height)) else: box_height = self.this_scale * size / np.sqrt(ar) box_width = self.this_scale * size * np.sqrt(ar) wh_list.append((box_width, box_height)) wh_list = np.array(wh_list) # We need the shape of the input tensor if K.image_dim_ordering() == 'tf': batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape # Compute the grid of box center points. They are identical for all aspect ratios. # Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally. if (self.this_steps is None): step_height = self.img_height / feature_map_height step_width = self.img_width / feature_map_width else: if isinstance(self.this_steps, (list, tuple)) and (len(self.this_steps) == 2): step_height = self.this_steps[0] step_width = self.this_steps[1] elif isinstance(self.this_steps, (int, float)): step_height = self.this_steps step_width = self.this_steps # Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image. if (self.this_offsets is None): offset_height = 0.5 offset_width = 0.5 else: if isinstance(self.this_offsets, (list, tuple)) and (len(self.this_offsets) == 2): offset_height = self.this_offsets[0] offset_width = self.this_offsets[1] elif isinstance(self.this_offsets, (int, float)): offset_height = self.this_offsets offset_width = self.this_offsets # Now that we have the offsets and step sizes, compute the grid of anchor box center points. cy = np.linspace(offset_height * step_height, (offset_height + feature_map_height - 1) * step_height, feature_map_height) cx = np.linspace(offset_width * step_width, (offset_width + feature_map_width - 1) * step_width, feature_map_width) cx_grid, cy_grid = np.meshgrid(cx, cy) cx_grid = np.expand_dims( cx_grid, -1 ) # This is necessary for np.tile() to do what we want further down cy_grid = np.expand_dims( cy_grid, -1 ) # This is necessary for np.tile() to do what we want further down # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)` # where the last dimension will contain `(cx, cy, w, h)` boxes_tensor = np.zeros( (feature_map_height, feature_map_width, self.n_boxes, 4)) boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)` boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2corners') # If `clip_boxes` is enabled, clip the coordinates to lie within the image boundaries if self.clip_boxes: x_coords = boxes_tensor[:, :, :, [0, 2]] x_coords[x_coords >= self.img_width] = self.img_width - 1 x_coords[x_coords < 0] = 0 boxes_tensor[:, :, :, [0, 2]] = x_coords y_coords = boxes_tensor[:, :, :, [1, 3]] y_coords[y_coords >= self.img_height] = self.img_height - 1 y_coords[y_coords < 0] = 0 boxes_tensor[:, :, :, [1, 3]] = y_coords # If `normalize_coords` is enabled, normalize the coordinates to be within [0,1] if self.normalize_coords: boxes_tensor[:, :, :, [0, 2]] /= self.img_width boxes_tensor[:, :, :, [1, 3]] /= self.img_height # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth. if self.coords == 'centroids': # Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`. boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2centroids', border_pixels='half') elif self.coords == 'minmax': # Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax). boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2minmax', border_pixels='half') # Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape # as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis. variances_tensor = np.zeros_like( boxes_tensor ) # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)` variances_tensor += self.variances # Long live broadcasting # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)` boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1) # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)` boxes_tensor = np.expand_dims(boxes_tensor, axis=0) boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1)) return boxes_tensor
def __init__(self, directory, image_data_generator, target_size=(256, 256), color_mode='rgb', dim_ordering='default', classes=None, class_mode='categorical', batch_size=32, shuffle=True, seed=None, save_to_dir=None, save_prefix='', save_format='jpeg'): if dim_ordering == 'default': dim_ordering = K.image_dim_ordering() self.directory = directory self.image_data_generator = image_data_generator self.target_size = tuple(target_size) if color_mode not in {'rgb', 'grayscale'}: raise ValueError('Invalid color mode:', color_mode, '; expected "rgb" or "grayscale".') self.color_mode = color_mode self.dim_ordering = dim_ordering if self.color_mode == 'rgb': if self.dim_ordering == 'tf': self.image_shape = self.target_size + (3, ) else: self.image_shape = (3, ) + self.target_size else: if self.dim_ordering == 'tf': self.image_shape = self.target_size + (1, ) else: self.image_shape = (1, ) + self.target_size self.classes = classes if class_mode not in {'categorical', 'binary', 'sparse', None}: raise ValueError( 'Invalid class_mode:', class_mode, '; expected one of "categorical", ' '"binary", "sparse", or None.') self.class_mode = class_mode self.save_to_dir = save_to_dir self.save_prefix = save_prefix self.save_format = save_format white_list_formats = {'png', 'jpg', 'jpeg', 'bmp'} # first, count the number of samples and classes self.nb_sample = 0 if not classes: classes = [] for subdir in sorted(os.listdir(directory)): if os.path.isdir(os.path.join(directory, subdir)): classes.append(subdir) self.nb_class = len(classes) self.class_indices = dict(zip(classes, range(len(classes)))) for subdir in classes: subpath = os.path.join(directory, subdir) for fname in sorted(os.listdir(subpath)): is_valid = False for extension in white_list_formats: if fname.lower().endswith('.' + extension): is_valid = True break if is_valid: self.nb_sample += 1 print('Found %d images belonging to %d classes.' % (self.nb_sample, self.nb_class)) # second, build an index of the images in the different class subfolders self.filenames = [] self.classes = np.zeros((self.nb_sample, ), dtype='int32') i = 0 for subdir in classes: subpath = os.path.join(directory, subdir) for fname in sorted(os.listdir(subpath)): is_valid = False for extension in white_list_formats: if fname.lower().endswith('.' + extension): is_valid = True break if is_valid: self.classes[i] = self.class_indices[subdir] self.filenames.append(os.path.join(subdir, fname)) i += 1 super(DirectoryIterator, self).__init__(self.nb_sample, batch_size, shuffle, seed)
def get_weight_path(): if K.image_dim_ordering() == 'th': print('pretrained weights not available for VGG with theano backend') return else: return 'vgg16_weights_tf_dim_ordering_tf_kernels.h5'
if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose: mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor))/len(rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print('Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.') # data generator에서 X, Y, image 가져오기 X, Y, img_data = next(data_gen_train) loss_rpn = model_rpn.train_on_batch(X, Y) write_log(callback, ['rpn_cls_loss', 'rpn_reg_loss'], loss_rpn, train_step) P_rpn = model_rpn.predict_on_batch(X) R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue # sampling positive/negative samples neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: neg_samples = neg_samples[0] else: