def getPointMaskerRefinedHead(backbone, out_side_len, in_channels): out_shape = (out_side_len, out_side_len, 1) method = tf.image.ResizeMethod.BILINEAR x = layerUtils.depthwiseConvBlock(backbone, in_channels, 64, dilation_rate=[2, 2]) x = layerUtils.depthwiseConvBlock(x, 64, 16) x = layerUtils.Resize(14, method)(x) x = layerUtils.depthwiseConvBlock(x, 16, 1, final_activation='linear') x = layerUtils.Resize(out_side_len, method)(x) return x
def getPointMasker(im_side_len, mask_side_len, compile=True): im_shape = (im_side_len, im_side_len, 3) # lip only for now l = mask_side_len num_coords = 13 img_input = Input(shape=im_shape) label_masks = Input(shape=(l, l, num_coords)) z = [] # 224x224 x = Convolution2D(32, (3, 3), strides=(2, 2), padding='same', use_bias=False)(img_input) # 112x112 x = layerUtils.depthwiseConvBlock(x, 32, 64, down_sample=True) x = layerUtils.depthwiseConvBlock(x, 64, 64) backbone = layerUtils.depthwiseConvBlock(x, 64, 64) # 56x56 x = layerUtils.depthwiseConvBlock(x, 64, 128, down_sample=True) # 28x28 x = layerUtils.depthwiseConvBlock(x, 128, 256, down_sample=True) # 14x14 # having a larger kernel size gives a larger receptive field, which helps prevent misclassification x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[2, 2]) x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[4, 4]) x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[8, 8]) method = tf.image.ResizeMethod.BILINEAR x = layerUtils.depthwiseConvBlock(x, 256, 128) x = layerUtils.Resize(28, method)(x) x = layerUtils.depthwiseConvBlock(x, 128, num_coords, final_activation='linear') pred = x model = Model(inputs=[img_input], outputs=[pred]) #optimizer = optimizers.adam(lr=6E-2) if compile: optimizer = optimizers.SGD(lr=5E-5, momentum=0.9, nesterov=True) model.compile(loss=[pointMaskSigmoidLoss], optimizer=optimizer) return model, backbone
def getPointMaskerSmall(self, in_side_len, out_side_len, in_channels, out_channels): # This one does not compile on its own im_shape = (in_side_len, in_side_len, in_channels) out_shape = (out_side_len, out_side_len, out_channels) img_input = Input(im_shape) method = tf.image.ResizeMethod.BILINEAR x = Convolution2D(16, (3, 3), strides=(2, 2), padding='same', use_bias=False)(img_input) x = layerUtils.depthwiseConvBlock(x, 16, 64, down_sample=True) x = layerUtils.depthwiseConvBlock(x, 64, 64, dilation_rate=[2, 2]) x = layerUtils.depthwiseConvBlock(x, 64, 64, dilation_rate=[4, 4]) x = layerUtils.depthwiseConvBlock(x, 64, 16) x = layerUtils.Resize(14, method)(x) x = layerUtils.depthwiseConvBlock(x, 16, out_channels, final_activation='linear') x = layerUtils.Resize(out_side_len, method)(x) model = Model(inputs=img_input, outputs=x) return model
def cascadedPointMaskSigmoidLoss(self, y_true, y_pred): num_coords = 13 method = tf.image.ResizeMethod.BILINEAR # split the preds up into their component parts (dammit keras!) base_preds = y_pred[:, :, :, :13] base_preds = tf.stop_gradient(base_preds) refined_preds = y_pred[:, :, :, 13:] # get crops; # this is actually repetitive code from the model architecture, # blame keras for inflexible loss function arguments base_preds_normalized = Activation('sigmoid')(base_preds) mask_means = layerUtils.MaskMean()(base_preds_normalized) true_means = layerUtils.MaskMean()(y_true) #boxes = layerUtils.BoxesFromCenters(28.0 / self.im_height)(true_means) #boxes = layerUtils.PerturbBboxes([0.8, 1.2], [-0.25, 0.25])(boxes) boxes = layerUtils.BoxesFromCenters(28.0 / self.im_height)(mask_means) # avoid penalizing refined mask when the initial estimate is not even close to truth sqrd_diffs = tf.squared_difference(mask_means, true_means) dists = tf.sqrt(tf.reduce_sum(sqrd_diffs, axis=-1)) thresh = 0.30 * 28.0 / self.im_height loss_mask = tf.where(dists < thresh, tf.ones(tf.shape(dists)), tf.zeros(tf.shape(dists))) loss_mask = tf.expand_dims(loss_mask, 1) loss_mask = tf.expand_dims(loss_mask, 1) label_crops = [] for i in range(num_coords): box = Lambda(lambda x: x[:, i, :])(boxes) label_mask = Lambda(lambda x: x[:, :, :, i])(y_true) label_mask = tf.expand_dims(label_mask, axis=-1) label_crop = layerUtils.CropAndResize(28)([label_mask, box]) label_crops.append(label_crop) labels = Concatenate()(label_crops) labels *= loss_mask refined_preds = layerUtils.Resize(28, method)(refined_preds) refined_preds *= loss_mask return self.pointMaskDistanceLoss(labels, refined_preds)
def pointMaskDistanceLossPresetDims(self, labels, preds): method = tf.image.ResizeMethod.BILINEAR labels = layerUtils.Resize(28, method)(labels) preds = layerUtils.Resize(28, method)(preds) return self.pointMaskDistanceLoss(labels, preds)
def getLipMaskerZoomed(self, alpha=1): # for bbox regressor alpha_1 = alpha # for mask cnn alpha_2 = 1.0 input_tensor = None shallow = False input_shape = (self.im_height, self.im_width, 3) # https://github.com/rcmalli/keras-mobilenet/blob/master/keras_mobilenet/mobilenet.py input_shape = _obtain_input_shape(input_shape, default_size=224, min_size=96, data_format=K.image_data_format(), require_flatten=True) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # labels to be set as inputs as well mask_gts = Input(shape=(self.im_height, self.im_width, 1)) """ # Mask head: # https://arxiv.org/pdf/1703.06870.pdf #a = layerUtils.CropAndResize(7)([x, b]) #a = Convolution2D(int(512 * alpha), (3, 3), strides=(2, 2), padding='same', use_bias=False)(x) #a = layerUtils.depthwiseConvBlock(x, 512 * alpha, 512 * alpha) """ # note to self: alternative to sharing features -- just use a new fully-convolutional architecture a = layerUtils.Resize(112, tf.image.ResizeMethods.BILINEAR)(img_input) a = Convolution2D(int(32 * alpha_2), (3, 3), strides=(2, 2), padding='same', use_bias=False)(a) a = BatchNormalization()(a) a = Activation('relu')(a) a = layerUtils.depthwiseConvBlock(a, 32 * alpha_2, 64 * alpha_2) a = layerUtils.depthwiseConvBlock(a, 64 * alpha_2, 128 * alpha_2, down_sample=True) a = layerUtils.depthwiseConvBlock(a, 128 * alpha_2, 128 * alpha_2) a = layerUtils.depthwiseConvBlock(a, 128 * alpha_2, 256 * alpha_2, down_sample=True) a = layerUtils.depthwiseConvBlock(a, 256 * alpha_2, 256 * alpha_2) a = layerUtils.depthwiseConvBlock(a, 256 * alpha_2, 512 * alpha_2, down_sample=True) if not shallow: for _ in range(5): a = layerUtils.depthwiseConvBlock(a, 512 * alpha_2, 512 * alpha_2) # 7x7 conv_transpose_depth = 128 a = Conv2DTranspose(int(conv_transpose_depth * alpha_2), kernel_size=(3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_last')(a) for i in range(3): a = layerUtils.depthwiseConvBlock(a, conv_transpose_depth * alpha_2, conv_transpose_depth * alpha_2) # 14x14 a = Conv2DTranspose(int(conv_transpose_depth * alpha_2), kernel_size=(3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_last')(a) for i in range(3): a = layerUtils.depthwiseConvBlock(a, conv_transpose_depth * alpha_2, conv_transpose_depth * alpha_2) # 28x28 a = Conv2DTranspose(int(conv_transpose_depth * alpha_2), kernel_size=(3, 3), strides=(2, 2), activation='relu', padding='same', data_format='channels_last')(a) a = layerUtils.depthwiseConvBlock(a, conv_transpose_depth * alpha_2, 1) #a = Lambda(lambda a: K.squeeze(a, axis=-1))(a) if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input # a is the unnormalized bboxes masks = Activation('sigmoid', name='masks')(a) #bboxes = Lambda(lambda b: b, name='bboxes')(b) #mask_loss = layerUtils.MaskSigmoidLossLayer(self.mask_side_len, name='mask_obj')([mask_gts, a, bboxes]) # try to generate ground truth masks (which were obtained from ground truth crops) #mask_loss = layerUtils.MaskSigmoidLossLayer(self.mask_side_len, name='mask_obj')([mask_gts, a, bboxes]) mask_loss = layerUtils.MaskSigmoidLossLayerNoCrop( self.mask_side_len, name='mask_obj')([mask_gts, a]) #mask_gts_cropped = layerUtils.CropAndResize(self.mask_side_len)([mask_gts, bbox_gts]) #mask_gts_cropped = Lambda(lambda a: K.squeeze(a, axis=-1))(mask_gts_cropped) #bbox_loss = layerUtils.SquaredDistanceLossLayer(name='bbox_obj')([bbox_gts, bboxes]) #total_loss = Lambda(lambda(l1, l2) : l1 + l2)([mask_loss, bbox_loss]) #model = Model(inputs=[inputs, bbox_gts, mask_gts], outputs=[mask_loss, bbox_loss, bboxes, mask_gts_cropped]) model = Model(inputs=[inputs, mask_gts], outputs=[mask_loss, masks]) optimizer = optimizers.adam(lr=4E-4) model.compile(loss=[self.identityLoss, None], optimizer=optimizer) #model = Model(inputs=[inputs, bbox_gts, mask_gts], outputs=[mask_loss]) #model.compile(loss=[self.identityLoss], optimizer='adam') #model.summary() return model
def getPointMasker(self): im_shape = (self.im_width, self.im_height, 3) masks_shape = (self.mask_side_len, self.mask_side_len, self.num_coords) summed_masks_shape = (self.mask_side_len, self.mask_side_len, 1) img_input = Input(shape=im_shape) label_masks = Input(shape=masks_shape) label_summed_masks = Input(shape=summed_masks_shape) x = Convolution2D(32, (3, 3), strides=(1, 1), padding='same', use_bias=False)(img_input) num_features = [64, 128, 256, 512, 512] z_layers = [None] * 4 x, z_layers[0] = layerUtils.rcfBlock(x, 32, num_features[0], 2, z_out_layers=2) x, z_layers[1] = layerUtils.rcfBlock(x, num_features[0], num_features[1], 2, z_out_layers=4) x, z_layers[2] = layerUtils.rcfBlock(x, num_features[1], num_features[2], 3, z_out_layers=1) x, z_layers[3] = layerUtils.rcfBlock(x, num_features[2], num_features[3], 3, z_out_layers=1) #x, z_layers[4] = layerUtils.rcfBlock(x, num_features[3], num_features[4], 3, z_out_layers=1) # want 112x112 feature maps z_layers[0] = layerUtils.depthwiseConvBlock(z_layers[0], 2, 4, down_sample=True) # upsample z_layers[2] = Conv2DTranspose(1, kernel_size=(3, 3), strides=(2, 2), activation='linear', padding='same')(z_layers[2]) z_layers[2] = Convolution2D(1, (1, 1))(z_layers[2]) z_layers[3] = Conv2DTranspose(1, kernel_size=(3, 3), strides=(4, 4), activation='linear', padding='same')(z_layers[3]) z_layers[3] = Convolution2D(1, (1, 1))(z_layers[3]) """ # long strides xD z_layers[4] = Conv2DTranspose( 1, kernel_size=(3, 3), strides=(8, 8), activation='linear', padding='same')(z_layers[4])""" final = Concatenate()(z_layers) final = layerUtils.depthwiseConvBlock(final, 10, 32, down_sample=True) final = layerUtils.depthwiseConvBlock(final, 32, self.num_coords) # losses losses = 3 * [None] # final prediction is 56x56 label_masks_downsampled = layerUtils.Resize( self.mask_side_len / 2, tf.image.ResizeMethod.AREA)(label_masks) losses[0] = layerUtils.MaskSigmoidLossLayerNoCrop( self.mask_side_len)([label_summed_masks, z_layers[2]]) losses[1] = layerUtils.MaskSigmoidLossLayerNoCrop( self.mask_side_len)([label_summed_masks, z_layers[3]]) losses[2] = layerUtils.PointMaskSoftmaxLossLayer( self.mask_side_len / 2)([label_masks_downsampled, final]) # names losses[0] = Lambda(lambda x: x, name='z2')(losses[0]) losses[1] = Lambda(lambda x: x, name='z3')(losses[1]) losses[2] = Lambda(lambda x: x, name='final')(losses[2]) model = Model(inputs=[img_input, label_masks, label_summed_masks], outputs=[ losses[0], losses[1], losses[2], z_layers[2], z_layers[3], final ]) optimizer = optimizers.adam(lr=3E-3) model.compile(loss=[ self.identityLoss, self.identityLoss, self.identityLoss, None, None, None ], optimizer=optimizer) return model
def getPointMaskerConcat(self, compile=True): im_shape = (self.im_width, self.im_height, 3) # lip only for now l = self.mask_side_len num_coords = 13 img_input = Input(shape=im_shape) label_masks = Input(shape=(l, l, num_coords)) z = [] # 224x224 x = Convolution2D(32, (3, 3), strides=(2, 2), padding='same', use_bias=False)(img_input) # 112x112 x = layerUtils.depthwiseConvBlock(x, 32, 64, down_sample=True) x = layerUtils.depthwiseConvBlock(x, 64, 64) backbone = layerUtils.depthwiseConvBlock(x, 64, 64) # 56x56 x = layerUtils.depthwiseConvBlock(x, 64, 128, down_sample=True) # 28x28 #z.append(layerUtils.depthwiseConvBlock(b, 128, 128)) #z.append(layerUtils.depthwiseConvBlock(x, 128, 128)) x = layerUtils.depthwiseConvBlock(x, 128, 256, down_sample=True) # 14x14 # having a larger kernel size gives a larger receptive field, which helps prevent misclassification x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[2, 2]) x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[4, 4]) x = layerUtils.depthwiseConvBlock(x, 256, 256, dilation_rate=[8, 8]) #z.append(x) method = tf.image.ResizeMethod.BILINEAR x = layerUtils.depthwiseConvBlock(x, 256, 128) x = layerUtils.Resize(28, method)(x) #z[0] = layerUtils.depthwiseConvBlock(z[0], 128, 64) #x = Concatenate()([x, z[0]]) #x = layerUtils.depthwiseConvBlock(x, 192, 128) x = layerUtils.depthwiseConvBlock(x, 128, num_coords, final_activation='linear') #x = layerUtils.depthwiseConvBlock(x, 192, num_coords, final_activation='linear') #x = layerUtils.depthwiseConvBlock(x, 32, num_coords, final_activation='linear') #loss = layerUtils.PointMaskSoftmaxLossLayer(l)([label_masks, x]) #loss = layerUtils.MaskSigmoidLossLayerNoCrop(l)([label_masks, x]) #x = Activation('sigmoid')(x) pred = x #loss = Lambda(lambda x: x, name='f0')(loss) model = Model(inputs=[img_input], outputs=[pred]) #optimizer = optimizers.adam(lr=6E-2) if compile: optimizer = optimizers.SGD(lr=5E-5, momentum=0.9, nesterov=True) model.compile(loss=[self.pointMaskSigmoidLoss], metrics=[self.pointMaskDistance], optimizer=optimizer) return model, backbone