def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def call(self, inputs, **kwargs): W = K.tanh(self.W_hat) * K.sigmoid(self.M_hat) a = K.dot(inputs, W) if self.nac_only: outputs = a else: m = K.exp(K.dot(K.log(K.abs(inputs) + self.epsilon), W)) g = K.sigmoid(K.dot(inputs, self.G)) outputs = g * a + (1. - g) * m return outputs
def call(self, input_feature): kernel_size = 3 # if K.image_data_format() == "channels_first": # channel = input_feature._keras_shape[1] # cbam_feature = Permute((2, 3, 1))(input_feature) # else: # channel = input_feature._keras_shape[-1] cbam_feature = input_feature print(input_feature.shape) avg_pool = Lambda(lambda x: K.mean(x, axis=3, keepdims=True))(cbam_feature) # # assert avg_pool._keras_shape[-1] == 1 #print(avg_pool.shape) max_pool = Lambda(lambda x: K.max(x, axis=3, keepdims=True))(cbam_feature) #print(max_pool.shape) # assert max_pool._keras_shape[-1] == 1 concat = Concatenate(axis=3)([avg_pool, max_pool]) #print(concat.shape) # assert concat._keras_shape[-1] == 2 cbam_feature = Conv2D(filters=1, kernel_size=kernel_size, strides=1, padding='same', kernel_initializer='he_normal', use_bias=False)(concat) # assert cbam_feature._keras_shape[-1] == 1 # if K.image_data_format() == "channels_first": # cbam_feature = Permute((3, 1, 2))(cbam_feature) print(cbam_feature.shape) res = K.sigmoid(cbam_feature) #print(res.shape) return res
def line_loss(y_true, y_pred): r1 = y_true * y_pred r2 = K.sigmoid(r1) r3 = K.log(r2) result = -K.mean(r3) return result
def line_loss(y_true, y_pred): try: import tensorflow as tf except ImportWarning: print("tensorflow not found, please install") pass from tensorflow.python.keras import backend as K y = K.sigmoid(y_true * y_pred) # Avoid Nan in the result of 'K.log' return -K.mean(K.log(tf.clip_by_value(y, 1e-8, tf.reduce_max(y))))
def call(self, inputs, **kwargs): input_shape = K.int_shape(inputs) sequence_length, d_model = input_shape[-2:] # output of the "sigmoid halting unit" (not the probability yet) halting = K.sigmoid( K.reshape( K.bias_add(K.dot(K.reshape(inputs, [-1, d_model]), self.act_weights['halting_kernel']), self.act_weights['halting_biases'], data_format='channels_last'), [-1, sequence_length])) if self.zeros_like_halting is None: self.initialize_control_tensors(halting) # useful flags step_is_active = K.greater(self.halt_budget, 0) no_further_steps = K.less_equal(self.halt_budget - halting, 0) # halting probability is equal to # a. halting output if this isn't the last step (we have some budget) # b. to remainder if it is, # c. and zero for the steps that shouldn't be executed at all # (out of budget for them) halting_prob = K.switch( step_is_active, K.switch(no_further_steps, self.remainder, halting), self.zeros_like_halting) self.active_steps += K.switch(step_is_active, self.ones_like_halting, self.zeros_like_halting) # We don't know which step is the last, so we keep updating # expression for the loss with each call of the layer self.ponder_cost = (self.act_weights['time_penalty_t'] * K.mean(self.remainder + self.active_steps)) # Updating "the remaining probability" and the halt budget self.remainder = K.switch(no_further_steps, self.remainder, self.remainder - halting) self.halt_budget -= halting # OK to become negative # If none of the inputs are active at this step, then instead # of zeroing them out by multiplying to all-zeroes halting_prob, # we can simply use a constant tensor of zeroes, which means that # we won't even calculate the output of those steps, saving # some real computational time. if self.zeros_like_input is None: self.zeros_like_input = K.zeros_like(inputs, name='zeros_like_input') # just because K.any(step_is_active) doesn't work in PlaidML any_step_is_active = K.greater(K.sum(K.cast(step_is_active, 'int32')), 0) step_weighted_output = K.switch( any_step_is_active, K.expand_dims(halting_prob, -1) * inputs, self.zeros_like_input) if self.weighted_output is None: self.weighted_output = step_weighted_output else: self.weighted_output += step_weighted_output return [inputs, self.weighted_output]
def line_loss(y_true, y_pred): ''' y_true = np.vstack([k_weight, odw]).T ''' r1 = layers.multiply([y_true[:, 1], y_pred]) r2 = K.sigmoid(r1) r3 = K.log(r2) r4 = layers.multiply([y_true[:, 0], r3]) result = -K.mean(r4) return result
def line_loss(y_true, y_pred): ''' y_true[0]: -1 or +1 (indicating pos/neg samples) y_true[1]: lamb (lamb * NS_loss) ''' r1 = y_true[0][0] * y_pred r2 = K.sigmoid(r1) r3 = K.log(r2) result = y_true[0][1] * -K.mean(r3) return result
def call(self, x): if self.mode == MODE_VISIBLE_BERNOULLI: return K.cast( K.less( K.random_uniform(shape=(self.hps['batch_size'], x.shape[1])) #? , K.sigmoid(K.dot(x, self.rbm_weight) + self.hidden_bias))) elif self.mode == MODE_VISIBLE_GAUSSIAN: return K.cast( K.less( K.random_uniform(shape=(self.hps['batch_size'], x.shape[1])), K.relu(K.dot(x, self.rbm_weight) + self.hidden_bias))) #?
def call(self, inputs, reverse=False, ddi=False, **kwargs): z = inputs z1, z2 = split_channels(z) scale, shift = split_channels_by_even_and_odd(self.nn(z1, ddi=ddi)) # scale = K.exp(scale) # seems not stable to train # scale = 1 + K.tanh(scale) * 0.2 # how about this? scale = K.sigmoid(scale + 2) # ?? from reference implementation if not reverse: z2 = (z2 + shift) * scale self.add_loss(-K.sum(K.log(scale), axis=[1, 2, 3]) * self.bit_per_sub_pixel_factor) else: z2 = z2 / scale - shift out = K.concatenate([z1, z2], axis=3) return out
def call(self, inputs, states, training=None): h_tm1 = states[0] c_tm1 = states[1] # dropout matrices for input units dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4) # dropout matrices for recurrent units rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(h_tm1, training, count=4) if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 (kernel_i, kernel_f, kernel_c, kernel_o) = array_ops.split(self.kernel, 4, axis=3) # (3, 3, input_dim, filters) (recurrent_kernel_i, recurrent_kernel_f, recurrent_kernel_c, recurrent_kernel_o) = array_ops.split(self.recurrent_kernel, 4, axis=3) if self.use_bias: bias_i, bias_f, bias_c, bias_o = array_ops.split(self.bias, 4) else: bias_i, bias_f, bias_c, bias_o = None, None, None, None # input_i: batch x_i = self.input_conv(inputs_i, kernel_i, bias_i, padding=self.padding) x_f = self.input_conv(inputs_f, kernel_f, bias_f, padding=self.padding) x_c = self.input_conv(inputs_c, kernel_c, bias_c, padding=self.padding) x_o = self.input_conv(inputs_o, kernel_o, bias_o, padding=self.padding) h_i = self.recurrent_conv(h_tm1_i, recurrent_kernel_i) h_f = self.recurrent_conv(h_tm1_f, recurrent_kernel_f) h_c = self.recurrent_conv(h_tm1_c, recurrent_kernel_c) h_o = self.recurrent_conv(h_tm1_o, recurrent_kernel_o) i = self.recurrent_activation(x_i + h_i) f = self.recurrent_activation(x_f + h_f) c = f * c_tm1 + i * self.activation(x_c + h_c) o = self.recurrent_activation(x_o + h_o) h = o * self.activation(c) # sa computation m_t_minus_one = states[2] # h, w, filters h_t, c_t = h, c (kernel_hv, kernel_hk, kernel_hq, kernel_mk, kernel_mv) = array_ops.split( self.sa_kernel, 5, axis=3) # kernel_size, filters, 1, turn to one layer if self.use_bias: bias_i, bias_g, bias_o = array_ops.split(self.sa_bias, 3) else: bias_i, bias_g, bias_o = None, None, None v_h = self.sa_conv(h_t, kernel_hv) k_h = self.sa_conv(h_t, kernel_hk) q_h = self.sa_conv(h_t, kernel_hq) k_m = self.sa_conv(m_t_minus_one, kernel_mk) v_m = self.sa_conv(m_t_minus_one, kernel_mv) # h, w, 1 q_h = K.squeeze(q_h, 3) k_m = K.squeeze(k_m, 3) k_h = K.squeeze(k_h, 3) e_m = tf.matmul(q_h, k_m) alpha_m = K.softmax(e_m) e_h = tf.matmul(q_h, k_h) alpha_h = K.softmax(e_h) v_m = K.squeeze(v_m, 3) v_h = K.squeeze(v_h, 3) z_m = tf.matmul(alpha_m, v_m) z_h = tf.matmul(alpha_h, v_h) z_m = K.expand_dims(z_m, 3) z_h = K.expand_dims(z_h, 3) zi = self.sa_conv(K.concatenate((z_h, z_m), 3), self.kernel_z) (kernel_m_zi, kernel_m_hi, kernel_m_zg, kernel_m_hg, kernel_m_zo, kernel_m_ho) = array_ops.split(self.depth_wise_kernel, 6, axis=3) # i = K.sigmoid( K.depthwise_conv2d(zi, kernel_m_zi, padding='same') + K.depthwise_conv2d(h_t, kernel_m_hi, padding='same') + bias_i) g = K.tanh( K.depthwise_conv2d(zi, kernel_m_zg, padding='same') + K.depthwise_conv2d(h_t, kernel_m_hg, padding='same') + bias_g) o = K.sigmoid( K.depthwise_conv2d(zi, kernel_m_zo, padding='same') + K.depthwise_conv2d(h_t, kernel_m_ho, padding='same') + bias_o) m_t = (1 - i) * m_t_minus_one + i * g h_hat_t = m_t * o # sa computation end return h_hat_t, [c_t, h_hat_t, m_t]
def line_loss(y_true, y_pred): y = K.sigmoid(y_true * y_pred) # Avoid Nan in the result of 'K.log' return -K.mean(K.log(tf.clip_by_value(y, 1e-8, tf.reduce_max(y))))
def build(self, input_shape): self.rbm_weight = self.add_weight( name='rbm_weight', shape=(input_shape[1], self.output_dim), initializer='uniform' # Which initializer is optimal? , trainable=True) self.hidden_bias = self.add_weight(name='rbm_hidden_bias', shape=(self.output_dim, ), initializer='uniform', trainable=True) self.visible_bias = K.variable(initializers.get('uniform')( (input_shape[1], )), dtype=K.floatx(), name='rbm_visible_bias') # Make symbolic computation objects. if self.mode == MODE_VISIBLE_BERNOULLI: # Transform visible units. self.input_visible = K.placeholder(shape=(None, input_shape[1]), name='input_visible') self.transform = K.cast( K.less( K.random_uniform(shape=(self.hps['batch_size'], input_shape[1])), K.sigmoid( K.dot(self.input_visible, self.rbm_weight) + self.hidden_bias))) self.transform_func = K.function([self.input_visible], [self.transform]) # Transform hidden units. self.input_hidden = K.placeholder(shape=(None, self.output_dim), name='input_hidden') self.inv_transform = K.cast( K.less( K.random_uniform(shape=(self.hps['batch_size'], input_shape[1])), K.sigmoid( K.dot(self.input_hidden, K.transpose(self.rbm_weight)) + self.visible_bias))) self.inv_transform_func = K.function([self.input_hidden], [self.inv_transform]) elif self.mode == MODE_VISIBLE_GAUSSIAN: # Transform visible units. self.input_visible = K.placeholder(shape=(None, input_shape[1]), name='input_visible') self.transform = K.cast( K.less( K.random_uniform(shape=(self.hps['batch_size'], input_shape[1])), K.relu( K.dot(self.input_visible, self.rbm_weight) + self.hidden_bias))) #? self.transform_func = K.function([self.input_visible], [self.transform]) # Transform hidden units. self.input_hidden = K.placeholder(shape=(None, self.output_dim), name='input_hidden') self.inv_transform = Ke.multivariate_normal_diag( loc=(K.dot(self.input_hidden, K.transpose(self.rbm_weight)) + self.visible_bias), scale_diag=np.ones(shape=(self.hps['batch_size'], input_shape[1]))).sample() self.inv_transform_func = K.function([self.input_hidden], [self.inv_transform]) else: # TODO pass # Calculate free energy. #? self.free_energy = -1 * (K.squeeze(K.dot(self.input_visible, K.expand_dims(self.visible_bias, axis=-1)), -1) +\ K.sum(K.log(1 + K.exp(K.dot(self.input_visible, self.rbm_weight) +\ self.hidden_bias)), axis=-1)) self.free_energy_func = K.function([self.input_visible], [self.free_energy]) super(RBM, self).build(input_shape)
def fit(self, V, verbose=1): """Train RBM with the data V. Parameters ---------- V : 2d numpy array Visible data (batch size x input_dim). verbose : integer Verbose mode (default, 1). """ num_step = V.shape[0] // self.hps['batch_size'] \ if V.shape[0] % self.hps['batch_size'] == 0 else V.shape[0] // self.hps['batch_size'] + 1 # Exception processing? for k in range(self.hps['epochs']): if verbose == 1: print(k + 1, '/', self.hps['epochs'], ' epochs', end='\r') if self.mode == MODE_VISIBLE_BERNOULLI: # Contrastive divergence. v_pos = self.input_visible h_pos = self.transform v_neg = K.cast(K.less( K.random_uniform(shape=(self.hps['batch_size'], V.shape[1])), K.sigmoid( K.dot(h_pos, K.transpose(self.rbm_weight)) + self.visible_bias)), dtype=np.float32) h_neg = K.sigmoid( K.dot(v_neg, self.rbm_weight) + self.hidden_bias) update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \ - K.dot(K.transpose(h_neg), v_neg)) self.rbm_weight_update_func = K.function( [self.input_visible], [K.update_add(self.rbm_weight, self.hps['lr'] * update)]) self.hidden_bias_update_func = K.function([self.input_visible] , [K.update_add(self.hidden_bias, self.hps['lr'] \ * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))]) self.visible_bias_update_func = K.function([self.input_visible] , [K.update_add(self.visible_bias, self.hps['lr'] \ * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))]) # Create the fist visible nodes sampling object. self.sample_first_visible = K.function([self.input_visible], [v_neg]) elif self.mode == MODE_VISIBLE_GAUSSIAN: # Contrastive divergence. v_pos = self.input_visible h_pos = self.transform v_neg = Ke.multivariate_normal_diag( loc=(K.dot(h_pos, K.transpose(self.rbm_weight)) + self.visible_bias), scale_diag=np.ones(shape=(self.hps['batch_size'], V.shape[1]))).sample() h_neg = K.sigmoid( K.dot(v_neg, self.rbm_weight) + self.hidden_bias) update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \ - K.dot(K.transpose(h_neg), v_neg)) self.rbm_weight_update_func = K.function( [self.input_visible], [K.update_add(self.rbm_weight, self.hps['lr'] * update)]) self.hidden_bias_update_func = K.function([self.input_visible] , [K.update_add(self.hidden_bias, self.hps['lr'] \ * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))]) self.visible_bias_update_func = K.function([self.input_visible] , [K.update_add(self.visible_bias, self.hps['lr'] \ * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))]) # Create the fist visible nodes sampling object. self.sample_first_visible = K.function([self.input_visible], [v_neg]) else: pass for i in range(num_step): if i == (num_step - 1): if self.mode == MODE_VISIBLE_BERNOULLI: # Contrastive divergence. v_pos = self.input_visible h_pos = self.transform v_neg = K.cast(K.less( K.random_uniform(shape=( V.shape[0] - int(i * self.hps['batch_size'], V.shape[1]))), K.sigmoid( K.dot(h_pos, K.transpose(self.rbm_weight)) + self.visible_bias)), dtype=np.float32) h_neg = K.sigmoid( K.dot(v_neg, self.rbm_weight) + self.hidden_bias) update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \ - K.dot(K.transpose(h_neg), v_neg)) self.rbm_weight_update_func = K.function( [self.input_visible], [ K.update_add(self.rbm_weight, self.hps['lr'] * update) ]) self.hidden_bias_update_func = K.function([self.input_visible] , [K.update_add(self.hidden_bias, self.hps['lr'] \ * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))]) self.visible_bias_update_func = K.function([self.input_visible] , [K.update_add(self.visible_bias, self.hps['lr'] \ * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))]) # Create the fist visible nodes sampling object. self.sample_first_visible = K.function( [self.input_visible], [v_neg]) elif self.mode == MODE_VISIBLE_GAUSSIAN: # Contrastive divergence. v_pos = self.input_visible h_pos = self.transform v_neg = Ke.multivariate_normal_diag( loc=(K.dot(h_pos, K.transpose(self.rbm_weight)) + self.visible_bias), scale_diag=np.ones(shape=( V.shape[0] - int(i * self.hps['batch_size'], V.shape[1]) ))).sample() h_neg = K.sigmoid( K.dot(v_neg, self.rbm_weight) + self.hidden_bias) update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \ - K.dot(K.transpose(h_neg), v_neg)) self.rbm_weight_update_func = K.function( [self.input_visible], [ K.update_add(self.rbm_weight, self.hps['lr'] * update) ]) self.hidden_bias_update_func = K.function([self.input_visible] , [K.update_add(self.hidden_bias, self.hps['lr'] \ * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))]) self.visible_bias_update_func = K.function([self.input_visible] , [K.update_add(self.visible_bias, self.hps['lr'] \ * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))]) # Create the fist visible nodes sampling object. self.sample_first_visible = K.function( [self.input_visible], [v_neg]) else: pass V_batch = [V[int(i * self.hps['batch_size']):V.shape[0]]] # Train. self.rbm_weight_update_func(V_batch) self.hidden_bias_update_func(V_batch) self.visible_bias_update_func(V_batch) else: V_batch = [ V[int(i * self.hps['batch_size']):int( (i + 1) * self.hps['batch_size'])] ] # Train. self.rbm_weight_update_func(V_batch) self.hidden_bias_update_func(V_batch) self.visible_bias_update_func(V_batch) # Calculate a training score by each step. # Free energy of the input visible nodes. fe = self.cal_free_energy(V_batch) # Free energy of the first sampled visible nodes. V_p_batch = self.sample_first_visible(V_batch) fe_p = self.cal_free_energy(V_p_batch) score = np.mean(np.abs(fe[0] - fe_p[0])) # Scale? print('\n{0:d}/{1:d}, score: {2:f}'.format( i + 1, num_step, score))
def yolo_loss(yolo_output, true_boxes, detectors_mask, matching_true_boxes, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tf.Tensor Final convolutional layer features. true_boxes : tf.Tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : np.ndarray 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : np.ndarray Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : np.ndarray Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then print the loss components. Returns ------- mean_loss : float Mean localization loss across minibatch """ num_anchors = len(anchors) object_scale = 5 no_object_scale, class_scale, coordinates_scale = 1, 1, 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # TODO: training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: # TODO: printing Tensor values. Maybe use eval function or session? print( 'yolo_loss: {}, conf_loss: {}, class_loss: {}, box_coord_loss: {}'. format(total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum)) return total_loss
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tf.Tensor Final convolutional layer features. anchors : np.array, list Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy: tf.Tensor (x, y) box predictions adjusted by spatial location in conv layer. box_wh: tf.Tensor (w, h) box predictions adjusted by anchors and conv spatial resolution. box_conf: tf.Tensor Probability estimate for whether each box contains any object. box_class_pred: tf.Tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_confidence = K.sigmoid(feats[..., 4:5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_confidence, box_xy, box_wh, box_class_probs
def line_loss(y_true, y_pred): return -kb.mean(kb.log(kb.sigmoid(y_true * y_pred)))
def line_loss(y_true, y_pred): return -K.mean(K.log(K.sigmoid(y_true * y_pred)))
args = parser.parse_args() dataloader = Dataloader(args) p = PointEmbedding(args) p_model = p.build() loss_func = p.custom_loss() x1 = Input((28, 28)) x2 = Input((28, 28)) x1_r = K.reshape(x1, (-1, 28, 28, 1)) x2_r = K.reshape(x2, (-1, 28, 28, 1)) pair1 = K.concatenate([x1_r, x2_r], axis=2) x3 = Input((28, 28)) x4 = Input((28, 28)) x3_r = K.reshape(x3, (-1, 28, 28, 1)) x4_r = K.reshape(x4, (-1, 28, 28, 1)) pair2 = K.concatenate([x3_r, x4_r], axis=2) output = Conv2D(1, (5, 5), (2, 2))(pair1) output = MaxPooling2D((2, 2), (1, 1))(output) output = Conv2D(1, (5, 5), (2, 2))(output) output = MaxPooling2D((2, 2), (1, 1))(output) output = Flatten()(output) output = Dense(2)(output) print(K.square(output)) print(K.sum(K.square(output), axis=-1)) print(K.sigmoid(K.sum(K.square(output), axis=-1)))