def get_split_averages(input_tensor, input_mask, indices): # Splits input tensor into three parts based on the indices and # returns average of values prior to index, values at the index and # average of values after the index. # input_tensor: (batch_size, input_length, input_dim) # input_mask: (batch_size, input_length) # indices: (batch_size, 1) # (1, input_length) length_range = K.expand_dims(K.arange(K.shape(input_tensor)[1]), dim=0) # (batch_size, input_length) batched_range = K.repeat_elements(length_range, K.shape(input_tensor)[0], 0) tiled_indices = K.repeat_elements(indices, K.shape(input_tensor)[1], 1) # (batch_size, input_length) greater_mask = K.greater(batched_range, tiled_indices) # (batch_size, input_length) lesser_mask = K.lesser(batched_range, tiled_indices) # (batch_size, input_length) equal_mask = K.equal(batched_range, tiled_indices) # (batch_size, input_length) # We also need to mask these masks using the input mask. # (batch_size, input_length) if input_mask is not None: greater_mask = switch(input_mask, greater_mask, K.zeros_like(greater_mask)) lesser_mask = switch(input_mask, lesser_mask, K.zeros_like(lesser_mask)) post_sum = K.sum(switch(K.expand_dims(greater_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) pre_sum = K.sum(switch(K.expand_dims(lesser_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) values_at_indices = K.sum(switch(K.expand_dims(equal_mask), input_tensor, K.zeros_like(input_tensor)), axis=1) # (batch_size, input_dim) post_normalizer = K.expand_dims(K.sum(greater_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) pre_normalizer = K.expand_dims(K.sum(lesser_mask, axis=1) + K.epsilon(), dim=1) # (batch_size, 1) return K.cast(pre_sum / pre_normalizer, 'float32'), values_at_indices, K.cast(post_sum / post_normalizer, 'float32')
def call(self, x, mask=None): # x[0]: (batch_size, input_length, input_dim) # x[1]: (batch_size, 1) indices of prepositions # Optional: x[2]: (batch_size, input_length - 2) assert isinstance(x, list) or isinstance(x, tuple) encoded_sentence = x[0] prep_indices = K.squeeze(x[1], axis=-1) #(batch_size,) batch_indices = K.arange(K.shape(encoded_sentence)[0]) # (batch_size,) if self.with_attachment_probs: # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable! head_probs = x[2] head_probs_padding = K.zeros_like(x[2])[:, :2] # (batch_size, 2) # (batch_size, input_length) padded_head_probs = K.concatenate([head_probs, head_probs_padding]) # (batch_size, 1) max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1)) # (batch_size, input_length, 1) max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs)) # (batch_size, input_length, input_dim) masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence)) # (batch_size, input_dim) head_encoding = K.sum(masked_head_encoding, axis=1) else: head_indices = prep_indices - 1 # (batch_size,) head_encoding = encoded_sentence[batch_indices, head_indices, :] # (batch_size, input_dim) prep_encoding = encoded_sentence[batch_indices, prep_indices, :] # (batch_size, input_dim) child_encoding = encoded_sentence[batch_indices, prep_indices+1, :] # (batch_size, input_dim) ''' prep_indices = x[1] sentence_mask = mask[0] if sentence_mask is not None: if K.ndim(sentence_mask) > 2: # This means this layer came after a Bidirectional layer. Keras has this bug which # concatenates input masks instead of output masks. # TODO: Fix Bidirectional instead. sentence_mask = K.any(sentence_mask, axis=(-2, -1)) head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask, prep_indices) ''' head_projection = K.dot(head_encoding, self.proj_head) # (batch_size, proj_dim) prep_projection = K.dot(prep_encoding, self.proj_prep) # (batch_size, proj_dim) child_projection = K.dot(child_encoding, self.proj_child) # (batch_size, proj_dim) #(batch_size, proj_dim) if self.composition_type == 'HPCT': composed_projection = K.tanh(head_projection + prep_projection + child_projection) elif self.composition_type == 'HPC': prep_child_projection = K.tanh(prep_projection + child_projection) # (batch_size, proj_dim) composed_projection = K.tanh(head_projection + prep_child_projection) else: # Composition type in HC composed_projection = K.tanh(head_projection + child_projection) for hidden_layer in self.hidden_layers: composed_projection = K.tanh(K.dot(composed_projection, hidden_layer)) # (batch_size, proj_dim) # (batch_size, num_classes) class_scores = K.dot(composed_projection, self.scorer) label_probabilities = K.softmax(class_scores) return label_probabilities
def step(self, x, states): h_tild_tm1 = states[0] B_U = states[1] B_W = states[2] if self.consume_less == 'cpu': x_i = x[:, :self.output_dim] x_f = x[:, self.output_dim: 2 * self.output_dim] x_c = x[:, 2 * self.output_dim: 3 * self.output_dim] x_o = x[:, 3 * self.output_dim: 4 * self.output_dim] x_new = x[:, 4 * self.output_dim:] else: x_i = K.dot(x * B_W[0], self.W_i) + self.b_i x_f = K.dot(x * B_W[1], self.W_f) + self.b_f x_c = K.dot(x * B_W[2], self.W_c) + self.b_c x_o = K.dot(x * B_W[3], self.W_o) + self.b_o x_new = x # self.C_tape -> BT, t-1, k # self.H_tape -> BT, t-1, k # x -> BT, k # h_tild_tm1 -> BT, k if self.H_tape is None: self.H_tape = K.zeros_like(h_tild_tm1).dimshuffle((0,'x',1)) self.C_tape = K.zeros_like(h_tild_tm1).dimshuffle((0,'x',1)) # s_t -> BT, t-1, 1 t = K.shape(self.C_tape)[1] sum1 = K.dot(self.H_tape, self.W_h) sum2 = K.dot(K.repeat_elements(x_new.dimshuffle((0,'x',1)),t, axis=1), self.W_x) sum3 = K.dot(K.repeat_elements(h_tild_tm1.dimshuffle((0,'x',1)),t, axis=1), self.W_h_tilde) tanhed_sum = K.tanh(sum1 + sum2 + sum3) a_t = K.dot(tanhed_sum, self.v)[:,:,0] s_t = K.softmax(a_t) h_tilde_t = T.batched_dot(self.H_tape.dimshuffle((0,2,1)), s_t.dimshuffle((0,1,'x')))[:,:,0] c_tilde_t = T.batched_dot(self.C_tape.dimshuffle((0,2,1)), s_t.dimshuffle((0,1,'x')))[:,:,0] i = self.inner_activation(x_i + K.dot(h_tilde_t * B_U[0], self.U_i)) f = self.inner_activation(x_f + K.dot(h_tilde_t * B_U[1], self.U_f)) c_t = f * c_tilde_t + i * self.activation(x_c + K.dot(h_tilde_t * B_U[2], self.U_c)) o = self.inner_activation(x_o + K.dot(h_tilde_t * B_U[3], self.U_o)) h_t = o * self.activation(c_t) # Add to Tape self.C_tape = K.concatenate([self.C_tape, c_t.dimshuffle((0,'x',1))], axis=1) self.H_tape = K.concatenate([self.H_tape, h_t.dimshuffle((0,'x',1))], axis=1) return h_t, [h_tilde_t]
def get_initial_states(self, x): M = K.zeros_like(x[:, 0, 0]) # (nb_samples,) M = K.pack([M] * self.nb_slots) # (nb_slots, nb_samples) M = K.pack([M] * self.memory_size) # (memory_size, nb_slots, nb_samples) M = K.permute_dimensions(M, (2, 1, 0)) # (nb_samples, nb_slots, memory_size) h = K.zeros_like(x[:, 0, 0]) # (nb_samples,) h = K.pack([h] * self.memory_size) # (memory_size, nb_samples) h = K.permute_dimensions(h, (1, 0)) # (nb_samples, memory_size) w = K.zeros_like(x[:, 0, 0]) # (nb_samples,) w = K.pack([w] * self.nb_slots) # (nb_slots, nb_samples) w = K.permute_dimensions(w, (1, 0)) # (nb_samples, nb_slots) states = [M, h, w] return states
def _get_initial_state(self, X): # X (input_length, nb_sample, input_dim) # build an all-zero tensor of shape (nb_samples, output_dim) initial_state = K.zeros_like(X) # (input_length, nb_sample, input_dim) initial_state = K.sum(initial_state, axis=0) # (nb_samples, input_dim) reducer = K.zeros((self.input_dim, self.output_dim)) initial_state = K.dot(initial_state, reducer) # (nb_samples, output_dim) return initial_state
def compile(self, optimizer, metrics=[]): metrics += [mean_q] # register default metrics def clipped_masked_error(args): y_true, y_pred, mask = args loss = huber_loss(y_true, y_pred, self.delta_clip) loss *= mask # apply element-wise mask return K.sum(loss, axis=-1) # Create trainable model. The problem is that we need to mask the output since we only # ever want to update the Q values for a certain action. The way we achieve this is by # using a custom Lambda layer that computes the loss. This gives us the necessary flexibility # to mask out certain parameters by passing in multiple inputs to the Lambda layer. y_pred = self.model.output y_true = Input(name='y_true', shape=(self.nb_actions,)) mask = Input(name='mask', shape=(self.nb_actions,)) loss_out = Lambda(clipped_masked_error, output_shape=(1,), name='loss')([y_pred, y_true, mask]) ins = [self.model.input] if type(self.model.input) is not list else self.model.input trainable_model = Model(inputs=ins + [y_true, mask], outputs=[loss_out, y_pred]) assert len(trainable_model.output_names) == 2 combined_metrics = {trainable_model.output_names[1]: metrics} losses = [ lambda y_true, y_pred: y_pred, # loss is computed in Lambda layer lambda y_true, y_pred: K.zeros_like(y_pred), # we only include this for the metrics ] trainable_model.compile(optimizer=optimizer, loss=losses, metrics=combined_metrics) self.trainable_model = trainable_model self.compiled = True
def accumulate(attend_function, inputs, input_length, mask=None, return_probabilities=False): '''get the running attention over a sequence. given a 3dim tensor where the 1st dim is time (or not. whatever.), calculating the running attended sum. in other words, at the first time step, you only have that item. at the second time step, attend over the first two items. at the third.. the third. so on. this basically a mod on keras' rnn implementation author: bcm ''' ndim = inputs.ndim assert ndim >= 3, 'inputs should be at least 3d' axes = [1,0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) indices = list(range(input_length)) successive_outputs = [] if mask is not None: if mask.ndim == ndim-1: mask = K.expand_dims(mask) assert mask.ndim == ndim mask = mask.dimshuffle(axes) prev_output = None successive_outputs = [] successive_pvecs = [] uncover_mask = K.zeros_like(inputs) uncover_indices = K.arange(input_length) for _ in range(ndim-1): uncover_indices = K.expand_dims(uncover_indices) make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask) for i in indices: inputs_i = make_subset(i,inputs) mask_i = make_subset(i,mask) if mask is not None: output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. else: output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. if return_probabilities: output, p_vectors = output successive_pvecs.append(p_vectors) assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors" successive_outputs.append(output) outputs = K.pack(successive_outputs) K.squeeze(outputs, -1) axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) if return_probabilities: out_pvecs = K.pack(successive_pvecs) K.squeeze(out_pvecs, -1) out_pvecs = out_pvecs.dimshuffle(axes) outputs = [outputs, out_pvecs] return outputs
def call(self, inputs, **kwargs): """Following the routing algorithm from Hinton's paper, but replace b = b + <u,v> with b = <u,v>. This change can improve the feature representation of the capsule. However, you can replace b = K.batch_dot(outputs, hat_inputs, [2, 3]) with b += K.batch_dot(outputs, hat_inputs, [2, 3]) to get standard routing. """ if self.share_weights: hat_inputs = K.conv1d(inputs, self.kernel) else: hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1]) batch_size = K.shape(inputs)[0] input_num_capsule = K.shape(inputs)[1] hat_inputs = K.reshape(hat_inputs, (batch_size, input_num_capsule, self.num_capsule, self.dim_capsule)) hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3)) b = K.zeros_like(hat_inputs[:, :, :, 0]) print(self.routings) for i in range(self.routings): c = K.softmax(b, 1) o = self.activation(K.batch_dot(c, hat_inputs, [2, 2])) if i < self.routings - 1: b = K.batch_dot(o, hat_inputs, [2, 3]) if K.backend() == 'theano': o = K.sum(o, axis=1) return o
def call(self, inputs, mask=None): if mask is None: mask = K.zeros_like(inputs) mask = K.sum(mask, axis=-1) mask = 1 + mask return K.expand_dims(mask)
def compile(optimizer, metrics=[]): metrics += [mean_q] # register default metrics # We never train the target model, hence we can set the optimizer and loss arbitrarily. target_model = clone_model(model) target_model.compile(optimizer='sgd', loss='mse') model.compile(optimizer='sgd', loss='mse') # Create trainable model. The problem is that we need to mask the output since we only # ever want to update the Q values for a certain action. The way we achieve this is by # using a custom Lambda layer that computes the loss. This gives us the necessary flexibility # to mask out certain parameters by passing in multiple inputs to the Lambda layer. y_pred = model.output y_true = Input(name='y_true', shape=(n_actions,)) mask = Input(name='mask', shape=(n_actions,)) loss_out = Lambda(clipped_masked_error, output_shape=(1,), name='loss')([y_true, y_pred, mask]) ins = [model.input] trainable_model = Model(inputs=ins + [y_true, mask], outputs=[loss_out, y_pred]) assert len(trainable_model.output_names) == 2 assert trainable_model.output_names[1] == 'dense2' combined_metrics = {trainable_model.output_names[1]: metrics} losses = [ lambda y_true, y_pred: y_pred, # loss is computed in Lambda layer lambda y_true, y_pred: K.zeros_like(y_pred), # we only include this for the metrics ] trainable_model.compile(optimizer=optimizer, loss=losses, metrics=combined_metrics) return trainable_model, target_model
def step(x): """Theano step function""" if (_BACKEND == 'tensorflow'): import tensorflow as tf return tf.select(tf.python.math_ops.greater(x, 0), K.ones_like(x), K.zeros_like(x)) else: return K.switch(x > 0, 1, 0)
def get_initial_states(self, x): initial_state = K.zeros_like(x) # (samples, num_steps, input_channel, h, w) initial_state = K.sum(initial_state, [1, 2]) # (samples, h, w) initial_state = K.expand_dims(initial_state, 1) initial_state = K.repeat_elements(initial_state, self.nb_filter, 1) initial_states = [initial_state for _ in range(len(self.states))] return initial_states
def get_initial_states(self, X): # build an all-zero tensor of shape (samples, hidden_dim) initial_state = K.zeros_like(X) # (samples, input_dim) reducer = K.zeros((self.input_dim, self.hidden_dim)) initial_state = K.dot(initial_state, reducer) # (samples, hidden_dim) initial_states = [initial_state for _ in range(len(self.states))] return initial_states
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' yolo_outputs = args[:3] y_true = args[3:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3)] loss = 0 m = K.shape(yolo_outputs[0])[0] for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet box loss. xy_delta = (y_true[l][..., :2]-pred_xy)*grid_shapes[l][::-1] wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh) # Avoid log(0)=-inf. wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta)) box_delta = K.concatenate([xy_delta, wh_delta], axis=-1) box_delta_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) box_loss = object_mask * K.square(box_delta*box_delta_scale) confidence_loss = object_mask * K.square(1-pred_confidence) + \ (1-object_mask) * K.square(0-pred_confidence) * ignore_mask class_loss = object_mask * K.square(true_class_probs-pred_class_probs) loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss) return loss / K.cast(m, K.dtype(loss))
def netD_loss(netD_predict): netD_predict_real, netD_predict_fake = netD_predict netD_loss_real = criterion_GAN(netD_predict_real, K.ones_like(netD_predict_real)) netD_loss_fake = criterion_GAN(netD_predict_fake, K.zeros_like(netD_predict_fake)) loss_netD = (1 / 2) * (netD_loss_real + netD_loss_fake) return loss_netD
def yoloxyloss(y_true, y_pred, t): #real_y_true = tf.where(t, y_true, K.zeros_like(y_true)) lo = K.square(y_true - y_pred) + 0.05 * K.square(0.5 -y_pred) value_if_true = lo value_if_false = K.zeros_like(y_true) loss1 = tf.where(t, value_if_true, value_if_false) objsum = K.sum(y_true) return K.sum(loss1)/(objsum+0.0000001)
def get_initial_states(self, x): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(x[:,:,0,:]) # (samples, timesteps, prev_timesteps, input_dim) initial_state = K.sum(initial_state, axis=1) # (samples, prev_timesteps, input_dim) reducer = K.zeros((self.input_dim, self.output_dim)) initial_state = K.dot(initial_state, reducer) # (samples, output_dim) initial_states = [initial_state for _ in range(len(self.states))] return initial_states
def get_initial_states(self, x): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(x) # (samples, timesteps, input_dim) initial_state = K.permute_dimensions(x, [1,0,2]) # (timesteps, samples, input_dim) reducer = K.zeros((self.input_dim, self.output_dim)) initial_state = K.dot(initial_state, reducer) # (timesteps, samples, output_dim) initial_states = [initial_state for _ in range(len(self.states))] return initial_states
def iou(x_true, y_true, w_true, h_true, x_pred, y_pred, w_pred, h_pred, t, pred_confid_tf): x_true = K.expand_dims(x_true, 2) y_true = K.expand_dims(y_true, 2) w_true = K.expand_dims(w_true, 2) h_true = K.expand_dims(h_true, 2) x_pred = K.expand_dims(x_pred, 2) y_pred = K.expand_dims(y_pred, 2) w_pred = K.expand_dims(w_pred, 2) h_pred = K.expand_dims(h_pred, 2) xoffset = K.expand_dims(tf.convert_to_tensor(np.asarray([0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7], dtype=np.float32)),1) yoffset = K.expand_dims(tf.convert_to_tensor(np.asarray([0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4], dtype=np.float32)),1) # xoffset = K.cast_to_floatx((np.tile(np.arange(side),side))) # yoffset = K.cast_to_floatx((np.repeat(np.arange(side),side))) x = tf.where(t, x_pred, K.zeros_like(x_pred)) y = tf.where(t, y_pred, K.zeros_like(y_pred)) w = tf.where(t, w_pred, K.zeros_like(w_pred)) h = tf.where(t, h_pred, K.zeros_like(h_pred)) ow = overlap(x + xoffset, w * 256. , x_true + xoffset, w_true * 256.) oh = overlap(y + yoffset, h * 160., y_true + yoffset, h_true * 256.) ow = tf.where(K.greater(ow, 0), ow, K.zeros_like(ow)) oh = tf.where(K.greater(oh, 0), oh, K.zeros_like(oh)) intersection = ow * oh union = w * 256. * h * 160. + w_true * 256. * h_true * 160. - intersection + K.epsilon() # prevent div 0 # # find best iou among bboxs # iouall shape=(-1, bnum*gridcells) iouall = intersection / union obj_count = K.sum(tf.where(t, K.ones_like(x_true), K.zeros_like(x_true))) ave_iou = K.sum(iouall) / (obj_count + 0.0000001) recall_t = K.greater(iouall, 0.5) # recall_count = K.sum(tf.select(recall_t, K.ones_like(iouall), K.zeros_like(iouall))) fid_t = K.greater(pred_confid_tf, 0.3) recall_count_all = K.sum(tf.where(fid_t, K.ones_like(iouall), K.zeros_like(iouall))) # obj_fid_t = tf.logical_and(fid_t, t) obj_fid_t = tf.logical_and(fid_t, recall_t) effevtive_iou_count = K.sum(tf.where(obj_fid_t, K.ones_like(iouall), K.zeros_like(iouall))) recall = effevtive_iou_count / (obj_count + 0.00000001) precision = effevtive_iou_count / (recall_count_all + 0.0000001) return ave_iou, recall, precision, obj_count, intersection, union, ow, oh, x, y, w, h
def w_categorical_crossentropyold(y_true, y_pred, weights): nb_cl = len(weights) final_mask = K.zeros_like(y_pred[:, 0]) y_pred_max = K.max(y_pred, axis=0) y_pred_max = K.reshape(y_pred_max, (K.shape(y_pred)[0], 1)) y_pred_max_mat = K.cast(K.equal(y_pred, y_pred_max), K.floatx()) for c_p, c_t in product(range(nb_cl), range(nb_cl)): final_mask += (weights[c_t, c_p] * y_pred_max_mat[:, c_p] * y_true[:, c_t]) return K.categorical_crossentropy(y_pred, y_true) * final_mask
def get_initial_states(self, x): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(x) # (samples, timesteps, input_dim) initial_state = K.permute_dimensions(x, [1,0,2]) # (timesteps, samples, input_dim) reducer = K.zeros((self.input_dim, self.output_dim)) initial_state = K.dot(initial_state, reducer) # (timesteps, samples, output_dim) initial_traversal = K.sum(initial_state, axis=0) # traversal is (samples, output_dim) initial_states = [initial_traversal, initial_state] # this order matches assumptions in rttn scan function return initial_states
def get_initial_states(self, X): states = super(DeepLSTM, self).get_initial_states(X) if self.readout: initial_state = K.zeros_like(X) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=1) # (samples, input_dim) reducer = K.zeros((self.input_dim, self.readout)) initial_state = K.dot(initial_state, reducer) # (samples, output_dim) states += [initial_state] return states
def get_initial_state(self, x): # x has shape (samples, timesteps, input_dim) # build all-zero tensors of shape (samples, whatever) initial_state = K.zeros_like(x) initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) initial_state = K.expand_dims(initial_state) # (samples, 1) lengths = (self.n_units*self.sigsize,self.n_units) initial_states = [K.tile(initial_state, [1, i]) for i in lengths] # (samples, i) return initial_states
def yolo_conf_loss(y_true, y_pred, t): real_y_true = tf.where(t, y_true, K.zeros_like(y_true)) pobj = K.sigmoid(y_pred) lo = K.square(real_y_true - pobj) value_if_true = 5.0 * (lo) value_if_false = 0.05 * (lo) loss1 = tf.where(t, value_if_true, value_if_false) loss = K.mean(loss1) return loss
def get_initial_states(self, x): # build an all-zero tensor of shape [(samples, output_dim), (samples, output_dim)] initial_state = K.zeros_like(x) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=1) # (samples, input_dim) reducer = K.random_uniform((self.input_dim, self.units)) reducer = reducer / K.exp(reducer) initial_state = K.dot(initial_state, reducer) # (samples, output_dim) initial_states = [K.stack([initial_state, initial_state]) for _ in range(len(self.states))] return initial_states
def get_initial_states(self, X): # (samples, timesteps, row, col, filter) initial_state = K.zeros_like(X) # (samples,row, col, filter) initial_state = K.sum(initial_state, axis=1) # initial_state = initial_state[::,] initial_state = self.conv_step(initial_state, K.zeros(self.W_shape), border_mode=self.border_mode) initial_states = [initial_state for _ in range(2)] return initial_states
def _get_initial_state(x, inp): # TODO: check that all x have the same number of samples / timesteps # TODO: test that x has 3 dimensions and inp has two dimensions x = x[0] input_dim = int(inp.get_shape()[1]) # copied from keras. Recurrent.get_initial_state initial_state = K.zeros_like(x, dtype=inp.dtype) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) initial_state = K.expand_dims(initial_state) # (samples, 1) return K.tile(initial_state, [1, input_dim]) # (samples, output_dim)
def call(self, x, mask=None): if mask is not None: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask, axis=-1) s = K.sum(mask, axis=1) if K.equal(s, K.zeros_like(s)) is None: return K.mean(x, axis=1) else: return K.cast(K.sum(x * mask, axis=1) / K.sqrt(s), K.floatx()) else: return K.sum(x, axis=1)/K.sqrt(len(x))
def define_loss(self, netD, real, fake_argb, fake_sz64, distorted, vggface_feat=None): alpha = Lambda(lambda x: x[:,:,:, :1])(fake_argb) fake_rgb = Lambda(lambda x: x[:,:,:, 1:])(fake_argb) fake = alpha * fake_rgb + (1-alpha) * distorted if self.use_mixup: dist = Beta(self.mixup_alpha, self.mixup_alpha) lam = dist.sample() # ========== mixup = lam * concatenate([real, distorted]) + (1 - lam) * concatenate([fake, distorted]) # ========== output_mixup = netD(mixup) loss_D = self.loss_fn(output_mixup, lam * K.ones_like(output_mixup)) #output_fake = netD(concatenate([fake, distorted])) # dummy loss_G = 1 * self.loss_fn(output_mixup, (1 - lam) * K.ones_like(output_mixup)) else: output_real = netD(concatenate([real, distorted])) # positive sample output_fake = netD(concatenate([fake, distorted])) # negative sample loss_D_real = self.loss_fn(output_real, K.ones_like(output_real)) loss_D_fake = self.loss_fn(output_fake, K.zeros_like(output_fake)) loss_D = loss_D_real + loss_D_fake loss_G = 1 * self.loss_fn(output_fake, K.ones_like(output_fake)) # ========== if self.use_mask_refinement: loss_G += K.mean(K.abs(fake - real)) else: loss_G += K.mean(K.abs(fake_rgb - real)) loss_G += K.mean(K.abs(fake_sz64 - tf.image.resize_images(real, [64, 64]))) # ========== # Perceptual Loss if not vggface_feat is None: def preprocess_vggface(x): x = (x + 1)/2 * 255 # channel order: BGR x -= [93.5940, 104.7624, 129.] return x pl_params = (0.02, 0.3, 0.5) real_sz224 = tf.image.resize_images(real, [224, 224]) real_sz224 = Lambda(preprocess_vggface)(real_sz224) # ========== if self.use_mask_refinement: fake_sz224 = tf.image.resize_images(fake, [224, 224]) else: fake_sz224 = tf.image.resize_images(fake_rgb, [224, 224]) fake_sz224 = Lambda(preprocess_vggface)(fake_sz224) # ========== real_feat55, real_feat28, real_feat7 = vggface_feat(real_sz224) fake_feat55, fake_feat28, fake_feat7 = vggface_feat(fake_sz224) loss_G += pl_params[0] * K.mean(K.abs(fake_feat7 - real_feat7)) loss_G += pl_params[1] * K.mean(K.abs(fake_feat28 - real_feat28)) loss_G += pl_params[2] * K.mean(K.abs(fake_feat55 - real_feat55)) return loss_D, loss_G
def call(self, x, mask=None): if mask is not None: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask, axis=-1) s = K.sum(mask, axis=1) if K.equal(s, K.zeros_like(s)) is None: return K.mean(x, axis=1) else: return K.cast(K.sum(x * mask, axis=1) / (K.sqrt(s) + K.constant(1e-10, dtype=K.floatx())), K.floatx()) else: print (x) return K.mean(x, axis=1)
def get_initial_state(self, x): print("\n------------------------------------------------------------------") # Below: changes color to red. Works in terminal but not spyder IPython console print("\033[91mprednet_RBP_28June2019.py: 'get_initial_state()' called \033[00m") input_shape = self.input_spec[0].shape init_nb_row = input_shape[self.row_axis] init_nb_col = input_shape[self.column_axis] print(" x: ", x) print(" x.shape: ", x.shape) print(" input_shape: ", input_shape) print(" init_nb_row: ", init_nb_row) print(" init_nb_col: ", init_nb_col) base_initial_state = K.zeros_like(x) # (samples, timesteps) + image_shape print(" Initial base_initial_state.shape: ", base_initial_state.shape) non_channel_axis = -1 if self.data_format == 'channels_first' else -2 for _ in range(2): base_initial_state = K.sum(base_initial_state, axis=non_channel_axis) base_initial_state = K.sum(base_initial_state, axis=1) # (samples, nb_channels) print(" Final base_initial_state: ", base_initial_state) print(" Final base_initial_state: ", base_initial_state.shape) initial_states = [] states_to_pass = ['r', 'c', 'e'] nlayers_to_pass = {u: self.nb_layers for u in states_to_pass} # Above: returns "{'r': 2, 'c': 2, 'e': 2}" if two layers if self.extrap_start_time is not None: states_to_pass.append('ahat') # pass prediction in states so can use as actual for t+1 when extrapolating nlayers_to_pass['ahat'] = 1 print(" Calculate stack and output sizes") for u in states_to_pass: # iterate over ['r', 'c', 'e'] for l in range(nlayers_to_pass[u]): # the value will always be the nb of layers in the network print("\n layer:" ,l) ds_factor = 2 ** l nb_row = init_nb_row // ds_factor nb_col = init_nb_col // ds_factor if u in ['r', 'c']: stack_size = self.R_stack_sizes[l] elif u == 'e': stack_size = 2 * self.stack_sizes[l] elif u == 'ahat': stack_size = self.stack_sizes[l] print(" state component:", u) print(" stack_size:", stack_size) output_size = stack_size * nb_row * nb_col # flattened size print(" output_size:", output_size) reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size) initial_state = K.dot(base_initial_state, reducer) # (samples, output_size) if self.data_format == 'channels_first': output_shp = (-1, stack_size, nb_row, nb_col) else: output_shp = (-1, nb_row, nb_col, stack_size) initial_state = K.reshape(initial_state, output_shp) print(" initial_state: ", initial_state, " for l=", l) initial_states += [initial_state] # if K._BACKEND == 'theano': # from theano import tensor as T # # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension. # # In our case, this is a problem when training on grayscale images, and the below line fixes it. # initial_states = [T.unbroadcast(init_state, 0, 1) for init_state in initial_states] if self.extrap_start_time is not None: initial_states += [K.variable(0, int if K.backend() != 'tensorflow' else 'int32')] # the last state will correspond to the current timestep print("\nRETURNING from get_initial_state()") for i in range(len(initial_states)): print(" ", initial_states[i]) print("States length:", len(initial_states)) return initial_states # return type is list
def compute_loss(yolo_outputs, y_true, anchors, num_classes, ignore_thresh=0.5, print_loss=False): """ Computes the custom written YOLO loss for provided output. Input: yolo_output: list of tensor, output of YOLO for provided input image y_true: list of tensor, y_true label corresponding to the output produced from GT anchors: array, anchors for YOLO num_classes: int, number of classes in the dataset ignore_threshold: float, threshold for considering a predicted box as True Positive print_loss: boolean, weather to print loss for each iteration, useful for debugging Output: loss: computed loss """ anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] num_layers = len(yolo_outputs) input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_IoU(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. with tf.name_scope('xy_loss'): xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) with tf.name_scope('wh_loss'): wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) with tf.name_scope('conf_loss'): confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask with tf.name_scope('class_loss'): class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) with tf.name_scope('total_loss'): xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def recursion(self, input_energy, mask=None, go_backwards=False, return_sequences=True, return_logZ=True, input_length=None): """Forward (alpha) or backward (beta) recursion If `return_logZ = True`, compute the logZ, the normalization constant: \[ Z = \sum_{y1, y2, y3} exp(-E) # energy = \sum_{y1, y2, y3} exp(-(u1' y1 + y1' W y2 + u2' y2 + y2' W y3 + u3' y3)) = sum_{y2, y3} (exp(-(u2' y2 + y2' W y3 + u3' y3)) sum_{y1} exp(-(u1' y1' + y1' W y2))) \] Denote: \[ S(y2) := sum_{y1} exp(-(u1' y1 + y1' W y2)), \] \[ Z = sum_{y2, y3} exp(log S(y2) - (u2' y2 + y2' W y3 + u3' y3)) \] \[ logS(y2) = log S(y2) = log_sum_exp(-(u1' y1' + y1' W y2)) \] Note that: yi's are one-hot vectors u1, u3: boundary energies have been merged If `return_logZ = False`, compute the Viterbi's best path lookup table. """ chain_energy = self.chain_kernel # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t chain_energy = K.expand_dims(chain_energy, 0) # shape=(B, F), dtype=float32 prev_target_val = K.zeros_like(input_energy[:, 0, :]) if go_backwards: input_energy = K.reverse(input_energy, 1) if mask is not None: mask = K.reverse(mask, 1) initial_states = [ prev_target_val, K.zeros_like(prev_target_val[:, :1]) ] constants = [chain_energy] if mask is not None: mask2 = K.cast( K.concatenate([mask, K.zeros_like(mask[:, :1])], axis=1), K.floatx()) constants.append(mask2) def _step(input_energy_i, states): return self.step(input_energy_i, states, return_logZ) target_val_last, target_val_seq, _ = K.rnn(_step, input_energy, initial_states, constants=constants, input_length=input_length, unroll=self.unroll) if return_sequences: if go_backwards: target_val_seq = K.reverse(target_val_seq, 1) return target_val_seq else: return target_val_last
def m_crossentropy(y_true, y_pred): loss = K.mean(K.binary_crossentropy(y_true, y_pred), -1) condition = K.greater(K.sum(y_true), 0) return K.switch(condition, loss, K.zeros_like(loss))
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' yolo_outputs = args[:3] y_true = args[3:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3) ] loss = 0 m = K.shape(yolo_outputs[0])[0] for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet box loss. xy_delta = (y_true[l][..., :2] - pred_xy) * grid_shapes[l][::-1] wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh) # Avoid log(0)=-inf. wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta)) box_delta = K.concatenate([xy_delta, wh_delta], axis=-1) box_delta_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) box_loss = object_mask * K.square(box_delta * box_delta_scale) confidence_loss = object_mask * K.square(1-pred_confidence) + \ (1-object_mask) * K.square(0-pred_confidence) * ignore_mask class_loss = object_mask * K.square(true_class_probs - pred_class_probs) loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss) return loss / K.cast(m, K.dtype(loss))
def shift_right(x, offset=1): assert offset > 0 return K.concatenate([K.zeros_like(x[:, :offset]), x[:, :-offset]], axis=1)
def zero_loss(y_true, y_pred): return K.zeros_like(y_pred)
def call(self, inputs, **kwargs): return K.zeros_like(inputs)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): #ytrue,youtput # 一共有三层 num_layers = len(anchors) // 3 # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 y_true = args[num_layers:] yolo_outputs = args[:num_layers] # 先验框 # 678为116,90, 156,198, 373,326 # 345为30,61, 62,45, 59,119 # 012为10,13, 16,30, 33,23, anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # 得到input_shpae为416,416 input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) # 得到网格的shape为13,13;26,26;52,52 grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 # 取出每一张图片 # m的值就是batch_size m = K.shape(yolo_outputs[0])[0] mf = K.cast(m, K.dtype(yolo_outputs[0])) # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 for l in range(num_layers): # 以第一个特征层(m,13,13,3,85)为例子 # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) object_mask = y_true[l][..., 4:5] #有目标的网格点 # 取出其对应的种类(m,13,13,3,80) true_class_probs = y_true[l][..., 5:] # 将yolo_outputs的特征层输出进行处理 # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85) # 还有解码后的xy,wh,(m,13,13,3,2) grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) # 这个是解码后的预测的box的位置 # (m,13,13,3,4) pred_box = K.concatenate([pred_xy, pred_wh]) # 找到负样本群组,第一步是创建一个数组,[] ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') # 对每一张图片计算ignore_mask def loop_body(b, ignore_mask): # 取出第b副图内,真实存在的所有的box的参数 # n,4 true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) # 计算预测结果与真实情况的iou # pred_box为13,13,3,4 # 计算的结果是每个pred_box和其它所有真实框的iou # 13,13,3,n iou = box_iou(pred_box[b], true_box) # 13,13,3,1 best_iou = K.max(iou, axis=-1) # 判断预测框的iou小于ignore_thresh则认为该预测框没有与之对应的真实框 # 则被认为是这幅图的负样本 ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask # 遍历所有的图片 _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) # 将每幅图的内容压缩,进行处理 ignore_mask = ignore_mask.stack() #(m,13,13,3,1,1) ignore_mask = K.expand_dims(ignore_mask, -1) # 将真实框进行编码,使其格式与预测的相同,后面用于计算loss raw_true_xy = y_true[l][..., :2] * grid_shapes[l][:] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) # object_mask如果真实存在目标则保存其wh值 # switch接口,就是一个if/else条件判断语句 raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) # 如果该位置本来有框,那么计算1与置信度的交叉熵 # 如果该位置本来没有框,而且满足best_iou<ignore_thresh,则被认定为负样本 # best_iou<ignore_thresh用于限制负样本数量 confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def leftShift(self, x): return K.concatenate([x[:, 1:], K.zeros_like(x[:, :1])], axis=1)
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) # first update the number of iterations self.updates = [K.update_add(self.iterations, 1)] # Cycling Gaussian LR # I implement this lr_f = lambda x,b,c,s: b+ s*np.exp(-(x-c)**2/(c*0.5)**2) def gauss_lr(min_lr, max_lr, center, lrsigma, i): return (min_lr + max_lr * K.exp(-(i - center)**2 / (center * lrsigma)**2)) ite_casted = K.cast(self.iterations, K.dtype(self.peaklriter)) all_lr = gauss_lr(self.min_lr['all'], self.peak_lr['all'], self.peaklriter, self.lrsigma, ite_casted) #current_lr = self.min_lr['all'] + #self.peak_lr['all']*K.exp(((ite_casted-self.peaklriter)**2)/(self.dropsigma*self.peaklriter)**2) ############################################################################ self.updates.append(K.update(self.lr['all'], all_lr)) shapes = [K.int_shape(p) for p in params] moments = [K.zeros(s) for s in shapes] self.weights = [self.iterations] + moments #print(self.weights) for p, g, m in zip(params, grads, moments): #print("HEREEEE:", p.name, g, m) lrptrkey = set_pattern_find(p.name, self.lr.keys()) if lrptrkey: if self.verbose > 0: print("Setting different learning rate for ", p.name, " : ", K.eval(self.lr[lrptrkey])) if set_pattern_find(p.name, self.min_lr.keys()) and set_pattern_find( p.name, self.peak_lr.keys()): p_lr = gauss_lr(self.min_lr[lrptrkey], self.peak_lr[lrptrkey], self.peaklriter, self.lrsigma, ite_casted) else: p_lr = gauss_lr(self.min_lr['all'], self.peak_lr['all'], self.peaklriter, self.lrsigma, ite_casted) else: p_lr = self.lr['all'] momptrkey = set_pattern_find(p.name, self.momentum.keys()) if momptrkey: if self.verbose > 0: print("Setting different momentum for ", p.name, " , ", K.eval(self.momentum[momptrkey])) momentum = self.momentum[momptrkey] else: momentum = self.momentum['all'] if self.nesterov: updt = momentum * (momentum * m - p_lr * g) - p_lr * g else: updt = momentum * m - p_lr * g # CHANGE CLIP _to_tensor = K.tensorflow_backend._to_tensor _clip_by_val = K.tf.clip_by_value margin = K.mean(K.abs(p)) * K.constant(self.UPCLIP) #margin = K.mean(K.abs(p*K.constant(self.UPCLIP))) #min_value = _to_tensor(-margin, p.dtype.base_dtype) #max_value = _to_tensor(margin, p.dtype.base_dtype) #max_v = K.maximum(min_value, max_value) min_v = K.zeros_like(margin) updt_sign = K.sign(updt) updt_val = _clip_by_val(K.abs(updt), min_v, margin) v = updt_sign * updt_val # velocity new_p = p + v self.updates.append(K.update(m, v)) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) clptrkey = set_pattern_find(p.name, self.clips.keys()) if self.clips_val and clptrkey: c = K.eval(self.clips[clptrkey]) if self.verbose > 0: print("Clipping variable", p.name, " to ", c) #input() new_p = K.clip(new_p, c[0], c[1]) #print("updates for ", p.name, " lr: ", K.eval(lr), " mom:", K.eval(momentum)) self.updates.append(K.update(p, new_p)) return self.updates
def rightShift(self, x): return K.concatenate([K.zeros_like(x[:, -1:]), x[:, :-1]], axis=1)
def shape_error(y_true, y_pred): is_ellipse = y_true[..., GT_INDEX.IS_ELLIPSE] sad = K.sum(K.abs(y_true[..., GT_INDEX.SHAPE_BEG: GT_INDEX.SHAPE_END] - y_pred[..., GT_INDEX.SHAPE_BEG: GT_INDEX.SHAPE_END]), axis=-1) return K.sum(K.switch(is_ellipse, sad, K.zeros_like(sad))) / (K.sum(is_ellipse) + K.epsilon())
"""Keras check verify keras is installed properly""" import numpy as np from keras import backend as kbe import os import warnings warnings.filterwarnings( 'ignore', '.*do not.*', ) os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1" # LEVEL 0: INFO, LEVEL 1: WARNING # Test keras - Backend interaction data = kbe.variable(np.random.random( (4, 2))) # create a 4*2 tensor of random numbers zero_data = kbe.zeros_like(data) # create zeros tensor of same size as data print(kbe.eval(zero_data))
def __init__(self, opt): gen_B = defineG(opt.shapeA, opt.shapeB[2], ngf=opt.ngf, name='gen_B') dis_B = basic_D(opt.shapeB, opt.ndf, use_sigmoid=not opt.use_lsgan, name='dis_B') gen_A = defineG_A(opt.shapeB, opt.label_shape_G, opt.shapeB[2], ngf=opt.ngf, name='gen_A') dis_A = defineD_A(opt.shapeA, opt.label_shape_D, opt.ndf, use_sigmoid=not opt.use_lsgan, name='dis_A') self.init_network(gen_B) self.init_network(dis_B) self.init_network(gen_A) self.init_network(dis_A) # building loss function # real image input real_A = Input(opt.shapeA) real_B = Input(opt.shapeB) true_label_D = Input(opt.label_shape_D) true_label_G = Input(opt.label_shape_G) fake_label_D = Input(opt.label_shape_D) true_label_D_pool = Input(opt.label_shape_D) fake_label_D_pool = Input(opt.label_shape_D) # input from fake image pool fake_A_pool = Input(opt.shapeA) fake_B_pool = Input(opt.shapeB) fake_B = gen_B(real_A) rec_A = gen_A([fake_B, true_label_G]) # = gen_A(gen_B(real_A)) fake_A = gen_A([real_B, true_label_G]) rec_B = gen_B(fake_A) # = gen_B(gen_A(real_B)) # discriminator A function output dis_A_real_real_label = dis_A([real_A, true_label_D]) dis_A_real_fake_label = dis_A([real_A, fake_label_D]) dis_A_fake_real_label = dis_A([fake_A_pool, true_label_D_pool]) dis_A_fake_fake_label = dis_A([fake_A_pool, fake_label_D_pool]) Gdis_A = dis_A([fake_A, true_label_D]) # discriminator B function output dis_B_real = dis_B(real_B) dis_B_fake = dis_B(fake_B_pool) Gdis_B = dis_B(fake_B) # DA, GA loss loss_DA_real_image_real_label = loss_fn( dis_A_real_real_label, K.ones_like(dis_A_real_real_label)) loss_DA_real_image_fake_label = loss_fn( dis_A_real_fake_label, K.zeros_like(dis_A_real_fake_label)) loss_DA_fake_image_real_label = loss_fn( dis_A_fake_real_label, K.zeros_like(dis_A_fake_real_label)) loss_DA_fake_image_fake_label = loss_fn( dis_A_fake_fake_label, K.zeros_like(dis_A_real_real_label)) loss_DA = loss_DA_real_image_real_label + loss_DA_real_image_fake_label + \ loss_DA_fake_image_real_label + loss_DA_fake_image_fake_label # real A with correct label loss_GA = loss_fn(Gdis_A, K.ones_like(Gdis_A)) loss_cycA = K.mean(K.abs(rec_A - real_A)) # DB, GB loss loss_DB_real = loss_fn(dis_B_real, K.ones_like(dis_B_real)) loss_DB_fake = loss_fn(dis_B_fake, K.zeros_like(dis_B_fake)) loss_DB = loss_DB_real + loss_DB_fake loss_GB = loss_fn(Gdis_B, K.ones_like(Gdis_B)) loss_cycB = K.mean(K.abs(rec_B - real_B)) # cycle loss loss_cyc = loss_cycA + loss_cycB # D's total loss loss_D = loss_DA + loss_DB # G's total loss loss_G = loss_GA + loss_GB + opt.lmbd * loss_cyc weightsD = dis_A.trainable_weights + dis_B.trainable_weights weightsG = gen_A.trainable_weights + gen_B.trainable_weights # training function for discriminator # update both of D_A, D_B based on the total loss of dis_a, dis_b training_updates = Adam(lr=opt.lr_D, beta_1=0.5).get_updates(weightsD, [], loss_D) netD_train = K.function([ real_A, real_B, true_label_D, true_label_G, fake_label_D, fake_A_pool, fake_B_pool, true_label_D_pool, fake_label_D_pool ], [loss_DA / 2, loss_DB / 2], training_updates) # training function for generator # update both of D_A, D_B based on the total loss of GA, GB and CYCLE loss training_updates = Adam(lr=opt.lr_G, beta_1=0.5).get_updates(weightsG, [], loss_G) netG_train = K.function([real_A, real_B, true_label_D, true_label_G], [loss_GA, loss_GB, loss_cyc], training_updates) self.G_trainner = netG_train self.D_trainner = netD_train self.AtoB = gen_B self.BtoA = gen_A self.DisA = dis_A self.DisB = dis_B self.opt = opt
def pad_depth(inputs, desired_channels): from keras import backend as K y = K.zeros_like(inputs, name='pad_depth1') return y
def loss(inputs, anchors, num_classes, ignore_thresh=0.5, print_loss=False, use_focal_loss=False): """ Compute yolo loss inputs: list of tensor, [y1, y2, y3, y_true1, y_true2, y_true3], shape=(b, h, w, num_anchors, 5 + num_classes) anchors: array, shape=(N, 2), each anchor value is wh num_classes: integer ignore_thresh: float, the ignore thresh print_loss: bool, whether should print loss use_focal_loss: bool Return: tensor, shape=(1,), the loss tensor """ assert len(inputs) == 6, 'inputs should has six entry' predicts = inputs[:3] # list of tensor labels = inputs[3:] # list of tensor float_type = K.dtype(predicts[0]) m = K.shape(predicts[0])[0] mf = K.cast(m, dtype=float_type) num_scales = len(predicts) input_shape = K.cast(K.shape(predicts[0])[1:3] * 32, dtype=float_type)[..., ::-1] # wh anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] anchors = np.array(anchors, dtype=float_type) losses = 0 xy_losses = 0 wh_losses = 0 class_losses = 0 confidence_losses = 0 for s in range(num_scales): y_true = K.cast(labels[s], dtype=float_type) true_mask = y_true[..., 4:5] true_mask_bool = K.cast(true_mask, dtype='bool') box_xy, box_wh, box_confidence, box_classes, \ raw_box_xy, raw_box_wh, grid = post_process_pred(predicts[s], input_shape, anchors[anchor_masks[s]], num_classes) grid_shape = K.shape(grid)[:2] # hw grid_shape = K.cast(grid_shape, dtype=float_type) # hw loss_scale = 2 - y_true[..., 2:3] * y_true[ ..., 3:4] # small objects get larger scale loss_scale = K.clip(loss_scale, 0, 2.0) raw_true_xy = y_true[..., :2] * grid_shape[::-1] - grid[..., ::-1] raw_true_wh = K.log(y_true[..., 2:4] * input_shape / anchors[anchor_masks[s]]) raw_true_wh = K.switch(true_mask, raw_true_wh, K.zeros_like(raw_true_wh, dtype=float_type)) ignore_mask = tf.TensorArray(dtype=float_type, size=1, dynamic_size=True) box_xywh = K.concatenate([box_xy, box_wh], axis=-1) true_xywh = K.concatenate([raw_true_xy, raw_true_wh], axis=-1) def loop_body(b, ignore_mask): true_boxes = tf.boolean_mask( true_xywh[b, ...], mask=true_mask_bool[b, ..., 0]) # shape=[j, 4] iou = box_iou(box_xywh[b, ...], true_boxes) best_iou = K.max(iou, axis=-1, keepdims=True) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, dtype=float_type)) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() xy_loss = true_mask * loss_scale * K.square(raw_true_xy - raw_box_xy) wh_loss = true_mask * loss_scale * 0.5 * K.square(raw_box_wh - raw_true_wh) if use_focal_loss: class_loss = true_mask * utils.sigmoid_focal_loss( y_true=y_true[..., 5:], y=box_classes, gama=2.0) confidence_loss = utils.sigmoid_focal_loss(y=box_confidence, y_true=y_true[..., 4:5], gama=2.0) * true_mask + \ utils.sigmoid_focal_loss(y=box_confidence, y_true=y_true[..., 4:5], gama=2.0) * ( 1 - true_mask) * ignore_mask else: class_loss = true_mask * K.binary_crossentropy( y_true[..., 5:], box_classes, from_logits=False) confidence_loss = true_mask * K.binary_crossentropy(y_true[..., 4:5], box_confidence, from_logits=False) + \ (1 - true_mask) * K.binary_crossentropy(y_true[..., 4:5], box_confidence, from_logits=False) * ignore_mask xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf class_loss = K.sum(class_loss) / mf confidence_loss = K.sum(confidence_loss) / mf xy_losses += xy_loss wh_losses += wh_loss class_losses += class_loss confidence_losses += confidence_loss losses += (xy_loss + wh_loss + class_loss + confidence_loss) if print_loss: losses = tf.Print( losses, [losses, xy_losses, wh_losses, class_losses, confidence_losses], message=' yolo loss: ') return losses
netG = UNET_G(imageSize, nc_in, nc_out, ngf) netG.summary() real_A = netG.input fake_B = netG.output netG_generate = K.function([real_A], [fake_B]) real_B = netD.inputs[1] output_D_real = netD([real_A, real_B]) output_D_fake = netD([real_A, fake_B]) loss_fn = lambda output, target: -K.mean( K.log(output + 1e-12) * target + K.log(1 - output + 1e-12) * (1 - target)) loss_D_real = loss_fn(output_D_real, K.ones_like(output_D_real)) loss_D_fake = loss_fn(output_D_fake, K.zeros_like(output_D_fake)) loss_G_fake = loss_fn(output_D_fake, K.ones_like(output_D_fake)) loss_L1 = K.mean(K.abs(fake_B - real_B)) loss_D = loss_D_real + loss_D_fake training_updates = Adam(lr=lrD, beta_1=0.5).get_updates(netD.trainable_weights, [], loss_D) netD_train = K.function([real_A, real_B], [loss_D / 2], training_updates) loss_G = loss_G_fake + 100 * loss_L1 training_updates = Adam(lr=lrG, beta_1=0.5).get_updates(netG.trainable_weights, [], loss_G) netG_train = K.function([real_A, real_B], [loss_G_fake, loss_L1],
def shift_left(x, offset=1): assert offset > 0 return K.concatenate([x[:, offset:], K.zeros_like(x[:, :offset])], axis=1)
def no_object_accuracy(y_true, y_pred): indexes_neg = tf.where(K.equal(y_true[:,:,:,:,0], K.zeros_like(y_true[:,:,:,:,0]))) y_true_pos = tf.gather_nd(y_true, indexes_neg) y_pred_pos = tf.gather_nd(y_pred, indexes_neg) return K.mean(K.equal(y_true_pos[:,:1], K.round(K.sigmoid(y_pred_pos[:,:1]))), axis=-1)
def get_isi_from_impulse(impulse, epsilon): return k.T.where(impulse < epsilon, k.zeros_like(impulse), k.T.true_div(1., impulse))
def no_object_bin_cross_entropy_loss(y_true, y_pred): indexes_neg = tf.where(K.equal(y_true[:,:,:,:,0], K.zeros_like(y_true[:,:,:,:,0]))) y_true_pos = tf.gather_nd(y_true, indexes_neg) y_pred_pos = tf.gather_nd(y_pred, indexes_neg) return K.mean(K.binary_crossentropy(y_true_pos[:,:1], K.sigmoid(y_pred_pos[:,:1])), axis=-1)
def loss(y_true, y_pred): return k.zeros_like(y_pred)
def zeropad(x): y = K.zeros_like(x) return K.concatenate([x, y], axis = 2)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False, normalize=True): # 一共有三个特征层 num_layers = len(anchors) // 3 # ---------------------------------------------------------------------------------------------------# # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # ---------------------------------------------------------------------------------------------------# y_true = args[num_layers:] yolo_outputs = args[:num_layers] # -----------------------------------------------------------# # 13x13的特征层对应的anchor是[116,90],[156,198],[373,326] # 26x26的特征层对应的anchor是[30,61],[62,45],[59,119] # 52x52的特征层对应的anchor是[10,13],[16,30],[33,23] # -----------------------------------------------------------# anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 得到input_shpae为416,416 input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) # -----------------------------------------------------------# # 得到网格的shape为[13,13]; [26,26]; [52,52] # -----------------------------------------------------------# grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 num_pos = 0 # -----------------------------------------------------------# # 取出每一张图片 # m的值就是batch_size # -----------------------------------------------------------# m = K.shape(yolo_outputs[0])[0] mf = K.cast(m, K.dtype(yolo_outputs[0])) # ---------------------------------------------------------------------------------------------------# # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 # ---------------------------------------------------------------------------------------------------# for l in range(num_layers): # -----------------------------------------------------------# # 以第一个特征层(m,13,13,3,85)为例子 # 取出该特征层中存在目标的点的位置。(m,13,13,3,1) # -----------------------------------------------------------# object_mask = y_true[l][..., 4:5] # -----------------------------------------------------------# # 取出其对应的种类(m,13,13,3,80) # -----------------------------------------------------------# true_class_probs = y_true[l][..., 5:] # -----------------------------------------------------------# # 将yolo_outputs的特征层输出进行处理、获得四个返回值 # 其中: # grid (13,13,1,2) 网格坐标 # raw_pred (m,13,13,3,85) 尚未处理的预测结果 # pred_xy (m,13,13,3,2) 解码后的中心坐标 # pred_wh (m,13,13,3,2) 解码后的宽高坐标 # -----------------------------------------------------------# grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) # -----------------------------------------------------------# # pred_box是解码后的预测的box的位置 # (m,13,13,3,4) # -----------------------------------------------------------# pred_box = K.concatenate([pred_xy, pred_wh]) # -----------------------------------------------------------# # 找到负样本群组,第一步是创建一个数组,[] # -----------------------------------------------------------# ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') # -----------------------------------------------------------# # 对每一张图片计算ignore_mask # -----------------------------------------------------------# def loop_body(b, ignore_mask): # -----------------------------------------------------------# # 取出n个真实框:n,4 # -----------------------------------------------------------# true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) # -----------------------------------------------------------# # 计算预测框与真实框的iou # pred_box 13,13,3,4 预测框的坐标 # true_box n,4 真实框的坐标 # iou 13,13,3,n 预测框和真实框的iou # -----------------------------------------------------------# iou = box_iou(pred_box[b], true_box) # -----------------------------------------------------------# # best_iou 13,13,3 每个特征点与真实框的最大重合程度 # -----------------------------------------------------------# best_iou = K.max(iou, axis=-1) # -----------------------------------------------------------# # 判断预测框和真实框的最大iou小于ignore_thresh # 则认为该预测框没有与之对应的真实框 # 该操作的目的是: # 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了 # 不适合当作负样本,所以忽略掉。 # -----------------------------------------------------------# ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask # -----------------------------------------------------------# # 在这个地方进行一个循环、循环是对每一张图片进行的 # -----------------------------------------------------------# _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) # -----------------------------------------------------------# # ignore_mask用于提取出作为负样本的特征点 # (m,13,13,3) # -----------------------------------------------------------# ignore_mask = ignore_mask.stack() # (m,13,13,3,1) ignore_mask = K.expand_dims(ignore_mask, -1) # -----------------------------------------------------------# # 将真实框进行编码,使其格式与预测的相同,后面用于计算loss # -----------------------------------------------------------# raw_true_xy = y_true[l][..., :2] * grid_shapes[l][:] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) # -----------------------------------------------------------# # object_mask如果真实存在目标则保存其wh值 # switch接口,就是一个if/else条件判断语句 # -----------------------------------------------------------# raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # -----------------------------------------------------------# # 真实框越大,比重越小,小框的比重更大。 # -----------------------------------------------------------# box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # -----------------------------------------------------------# # 利用binary_crossentropy计算中心点偏移情况,效果更好 # -----------------------------------------------------------# xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[..., 0:2], from_logits=True) # -----------------------------------------------------------# # wh_loss用于计算宽高损失 # -----------------------------------------------------------# wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh - raw_pred[..., 2:4]) # ------------------------------------------------------------------------------# # 如果该位置本来有框,那么计算1与置信度的交叉熵 # 如果该位置本来没有框,那么计算0与置信度的交叉熵 # 在这其中会忽略一部分样本,这些被忽略的样本满足条件best_iou<ignore_thresh # 该操作的目的是: # 忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了 # 不适合当作负样本,所以忽略掉。 # ------------------------------------------------------------------------------# confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[..., 5:], from_logits=True) # -----------------------------------------------------------# # 将所有损失求和 # -----------------------------------------------------------# xy_loss = K.sum(xy_loss) wh_loss = K.sum(wh_loss) confidence_loss = K.sum(confidence_loss) class_loss = K.sum(class_loss) # -----------------------------------------------------------# # 计算正样本数量 # -----------------------------------------------------------# num_pos += tf.maximum(K.sum(K.cast(object_mask, tf.float32)), 1) loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, tf.shape(ignore_mask)], summarize=100, message='loss: ') if normalize: loss = loss / num_pos else: loss = loss / mf return loss
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * \ K.binary_crossentropy(raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ') return loss
def yolo_loss(args, anchors, num_anchors_per_layer, num_classes, ignore_thresh=.5, print_loss=True): """ Return yolo_loss tensor Args: args (list): args[:num_output_layers] the output of yolo_body or tiny_yolo_body args[num_output_layers:] raw_y_true anchors (np.array): shape=(N, 2), wh num_anchors_per_layer (int): num_classes (int): ignore_thresh (float): the iou threshold whether to ignore object confidence loss print_loss: Returns: loss: tensor, shape=(1,) """ num_output_layers = len(anchors) // num_anchors_per_layer # num_layers yolo_outputs = args[:num_output_layers] raw_y_trues = args[num_output_layers:] # y_true anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(raw_y_trues[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(raw_y_trues[0])) for l in range(num_output_layers) ] loss = 0 batch_size = K.shape(yolo_outputs[0])[0] # m batch_size_f = K.cast(batch_size, K.dtype(yolo_outputs[0])) for l in range(num_output_layers): grid_shape = grid_shapes[l] yolo_output = yolo_outputs[l] #raw_y_pred = K.reshape(yolo_output, [-1, grid_shape[0], grid_shape[1], num_anchors_per_layer, num_classes + 9]) raw_y_pred = tf.reshape(yolo_output, [-1, -1, -1, 3, 14]) raw_y_true = raw_y_trues[l] anchor_mask = anchor_masks[l] # (batch_size, grid_height, grid_width, num_anchors_this_layer, 1) object_mask = raw_y_true[..., 4:5] # (batch_size, grid_height, grid_width, num_anchors_this_layer, num_classes) y_true_class_probs = raw_y_true[..., 5:] grid, y_pred_box, y_pred_delta_xy, y_pred_log_wh, y_pred_sigma, y_pred_confidence, y_pred_class_probs = \ y_pred_graph(raw_y_pred, anchors[anchor_mask], input_shape) y_true_delta_xy = raw_y_true[ ..., :2] * grid_shapes[l][::-1] - grid # raw_true_xy y_true_log_wh = K.log(raw_y_true[..., 2:4] * input_shape[::-1] / anchors[anchor_mask]) y_true_log_wh = K.switch(object_mask, y_true_log_wh, K.zeros_like(y_true_log_wh)) # raw_true_wh box_loss_scale = 2 - raw_y_true[..., 2:3] * raw_y_true[..., 3:4] ignore_mask = tf.TensorArray(K.dtype(raw_y_trues[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask_): # (num_gt_boxes, 4) gt_box = tf.boolean_mask(raw_y_true[b, ..., 0:4], object_mask_bool[b, ..., 0]) # (grid_height, grid_width, num_anchors_this_layer, num_gt_boxes) iou = box_iou_graph(y_pred_box[b], gt_box) # (grid_height, grid_width, num_anchors_this_layer) best_iou = K.max(iou, axis=-1) ignore_mask_ = ignore_mask_.write( b, K.cast(best_iou < ignore_thresh, K.dtype(gt_box))) return b + 1, ignore_mask_ _, ignore_mask = tf.while_loop(lambda b, *largs: b < batch_size, loop_body, [0, ignore_mask]) # (batch_size, grid_height, grid_width, num_anchors_this_layer) ignore_mask = ignore_mask.stack() # (batch_size, grid_height, grid_width, num_anchors_this_layer, 1) ignore_mask = K.expand_dims(ignore_mask, -1) y_true = tf.concat([y_true_delta_xy, y_true_log_wh], axis=-1) y_pred_mu = tf.concat([y_pred_delta_xy, y_pred_log_wh], axis=-1) x_loss = nll_loss(y_true[..., 0:1], y_pred_mu[..., 0:1], y_pred_sigma[..., 0:1]) x_loss = object_mask * box_loss_scale * x_loss y_loss = nll_loss(y_true[..., 1:2], y_pred_mu[..., 1:2], y_pred_sigma[..., 1:2]) y_loss = object_mask * box_loss_scale * y_loss w_loss = nll_loss(y_true[..., 2:3], y_pred_mu[..., 2:3], y_pred_sigma[..., 2:3]) w_loss = object_mask * box_loss_scale * w_loss h_loss = nll_loss(y_true[..., 3:4], y_pred_mu[..., 3:4], y_pred_sigma[..., 3:4]) h_loss = object_mask * box_loss_scale * h_loss ##### confidence_loss = object_mask * K.binary_crossentropy(object_mask, y_pred_confidence) + \ (1 - object_mask) * K.binary_crossentropy(object_mask, y_pred_confidence) * ignore_mask class_loss = object_mask * K.binary_crossentropy( y_true_class_probs, y_pred_class_probs) ##### x_loss = K.sum(x_loss) / batch_size_f y_loss = K.sum(y_loss) / batch_size_f w_loss = K.sum(w_loss) / batch_size_f h_loss = K.sum(h_loss) / batch_size_f confidence_loss = K.sum(confidence_loss) / batch_size_f class_loss = K.sum(class_loss) / batch_size_f loss += x_loss + y_loss + w_loss + h_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, x_loss, y_loss, w_loss, h_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='\nloss: ') return loss
def train(epochs, batch_size, dataset, baselr, use_pseudounet=False, use_unet=False, use_decay=False, plot_models=True, end_of_epoch_callback=None): if end_of_epoch_callback is not None: end_of_epoch_callback() # Load data and normalize # x_train_a, x_train_b, x_test_a, x_test_b = loadImagesFromDataset(h, w, dataset, use_hdf5=False) # x_train_a = (x_train_a.astype(np.float32) - 127.5) / 127.5 # x_train_b = (x_train_b.astype(np.float32) - 127.5) / 127.5 # x_test_a = (x_test_a.astype(np.float32) - 127.5) / 127.5 # x_test_b = (x_test_b.astype(np.float32) - 127.5) / 127.5 batchCount_a = n_batches batchCount_b = n_batches # Train on same image amount, would be best to have even sets batchCount = min([batchCount_a, batchCount_b]) print('\nEpochs:', epochs) print('Batch size:', batch_size) print('Batches per epoch: ', batchCount, "\n") # Retrieve components and save model before training, to preserve weights initialization disc_a, disc_b, gen_a2b, gen_b2a = components(w, h, pseudounet=use_pseudounet, unet=use_unet, plot=plot_models) # LOAD AND SAVE ==== loadModels('latest', dataset, gen_a2b, gen_b2a, disc_a, disc_b) # saveModels('latest', dataset, gen_a2b, gen_b2a, disc_a, disc_b) # Initialize fake images pools pool_a2b = [] pool_b2a = [] # Define optimizers adam_disc = Adam(lr=baselr, beta_1=0.5) adam_gen = Adam(lr=baselr, beta_1=0.5) # Define image batches true_a = gen_a2b.inputs[0] true_b = gen_b2a.inputs[0] fake_b = gen_a2b.outputs[0] fake_a = gen_b2a.outputs[0] fake_pool_a = K.placeholder(shape=(None, 3, h, w)) fake_pool_b = K.placeholder(shape=(None, 3, h, w)) # Labels for generator training y_fake_a = K.ones_like(disc_a([fake_a])) y_fake_b = K.ones_like(disc_b([fake_b])) # Labels for discriminator training y_true_a = K.ones_like(disc_a([true_a])) * 0.9 y_true_b = K.ones_like(disc_b([true_b])) * 0.9 fakelabel_a2b = K.zeros_like(disc_b([fake_b])) fakelabel_b2a = K.zeros_like(disc_a([fake_a])) # Define losses disc_a_loss = mse_loss(y_true_a, disc_a([true_a])) + mse_loss( fakelabel_b2a, disc_a([fake_pool_a])) disc_b_loss = mse_loss(y_true_b, disc_b([true_b])) + mse_loss( fakelabel_a2b, disc_b([fake_pool_b])) gen_a2b_loss = mse_loss(y_fake_b, disc_b([fake_b])) gen_b2a_loss = mse_loss(y_fake_a, disc_a([fake_a])) cycle_a_loss = mae_loss(true_a, gen_b2a([fake_b])) cycle_b_loss = mae_loss(true_b, gen_a2b([fake_a])) cyclic_loss = cycle_a_loss + cycle_b_loss # Prepare discriminator updater discriminator_weights = disc_a.trainable_weights + disc_b.trainable_weights disc_loss = (disc_a_loss + disc_b_loss) * 0.5 discriminator_updater = adam_disc.get_updates(discriminator_weights, [], disc_loss) # Prepare generator updater generator_weights = gen_a2b.trainable_weights + gen_b2a.trainable_weights gen_loss = (gen_a2b_loss + gen_b2a_loss + lmda * cyclic_loss) generator_updater = adam_gen.get_updates(generator_weights, [], gen_loss) # Define trainers generator_trainer = K.function([true_a, true_b], [gen_a2b_loss, gen_b2a_loss, cyclic_loss], generator_updater) discriminator_trainer = K.function( [true_a, true_b, fake_pool_a, fake_pool_b], [disc_a_loss / 2, disc_b_loss / 2], discriminator_updater) epoch_counter = 1 plotGeneratedImages(epoch_counter, dataset, batch_size, gen_a2b, gen_b2a) # Start training for e in range(1, epochs + 1): print('\n', '-' * 15, 'Epoch %d' % e, '-' * 15) gc.collect() # Learning rate decay if use_decay and (epoch_counter > 100): lr -= baselr / 100 adam_disc.lr = lr adam_gen.lr = lr # Initialize progbar and batch counter # progbar = generic_utils.Progbar(batchCount) # np.random.shuffle(x_train_a) # np.random.shuffle(x_train_b) print(f"Batch count: {batchCount}") # Cycle through batches for i in trange(int(1000)): # Select true images for training # true_batch_a = x_train_a[np.random.randint(0, x_train_a.shape[0], size=batch_size)] # true_batch_b = x_train_b[np.random.randint(0, x_train_b.shape[0], size=batch_size)] true_batch_a, true_batch_b, load_time = next( load_batch( dataset, batch_size, is_testing=False, )) print(f"Load time: {load_time}") # true_batch_a = x_train_a[i * batch_size:i * batch_size + batch_size] # true_batch_b = x_train_b[i * batch_size:i * batch_size + batch_size] # Fake images pool a2b = gen_a2b.predict(true_batch_a) b2a = gen_b2a.predict(true_batch_b) tmp_b2a = [] tmp_a2b = [] for element in a2b: if len(pool_a2b) < 50: pool_a2b.append(element) tmp_a2b.append(element) else: p = random.uniform(0, 1) if p > 0.5: index = random.randint(0, 49) tmp = np.copy(pool_a2b[index]) pool_a2b[index] = element tmp_a2b.append(tmp) else: tmp_a2b.append(element) for element in b2a: if len(pool_b2a) < 50: pool_b2a.append(element) tmp_b2a.append(element) else: p = random.uniform(0, 1) if p > 0.5: index = random.randint(0, 49) tmp = np.copy(pool_b2a[index]) pool_b2a[index] = element tmp_b2a.append(tmp) else: tmp_b2a.append(element) pool_a = np.array(tmp_b2a) pool_b = np.array(tmp_a2b) # Update network and obtain losses disc_a_err, disc_b_err = discriminator_trainer( [true_batch_a, true_batch_b, pool_a, pool_b]) gen_a2b_err, gen_b2a_err, cyclic_err = generator_trainer( [true_batch_a, true_batch_b]) # progbar.add(1, values=[ # ("D A", disc_a_err*2), # ("D B", disc_b_err*2), # ("G A2B loss", gen_a2b_err), # ("G B2A loss", gen_b2a_err), # ("Cyclic loss", cyclic_err) # ]) # Save losses for plotting disc_a_history.append(disc_a_err) disc_b_history.append(disc_b_err) gen_a2b_history_new.append(gen_a2b_err) gen_b2a_history_new.append(gen_b2a_err) # cycle_history.append(cyclic_err[0]) plotLoss_new() plotGeneratedImages(epoch_counter, dataset, batch_size, gen_a2b, gen_b2a) saveModels(epoch_counter, dataset, gen_a2b, gen_b2a, disc_a, disc_b) saveModels('latest', dataset, gen_a2b, gen_b2a, disc_a, disc_b) epoch_counter += 1 if end_of_epoch_callback is not None: end_of_epoch_callback()
def zeros_for_var(emb): l = Lambda(lambda x: K.zeros_like(x))(emb) return l
def get_detected_boxes(predicts, image_shape, anchors, num_classes, score_threshold=0.6, max_boxes=20, iou_threshold=0.5): """Filter ineffective predicts to get detected result predicts: list of tensor, each has shape=(1, h, w, num_anchors, 5 + num_classes) image_shape: tensor, shape=(2,), wh anchors: array, shape=(N, 2) num_classes: integer score_threshold: float max_boxes: integer iou_threshold: float Return: tuple of tensor, (boxes, scores, classes), each shape (N,4), (N,), (N,) """ num_scales = len(predicts) input_shape = K.shape(predicts[0])[1:3] * 32 input_shape = input_shape[::-1] raw_boxes = [] raw_scores = [] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] for i in range(num_scales): box_xy, box_wh, box_confidence, box_class_probs, _, _, _ = post_process_pred( predicts[i], input_shape, anchors[anchor_mask[i]], num_classes) # (1, h, w, num_anchors, 4), x_min, y_min, x_max, y_max, relative to original image(not scaled) rescaled_boxes = rescale_pred_box(box_xy, box_wh, input_shape, image_shape) # y_min, x_min, y_max, x_max, (1, h, w, num_anchors, 4) nms_boxes = K.concatenate([ rescaled_boxes[..., 1:2], rescaled_boxes[..., 0:1], rescaled_boxes[..., 3:4], rescaled_boxes[..., 2:3] ], axis=-1) # y_min, x_min, y_max, x_max, (h * w * num_anchors, 4) nms_boxes = K.reshape(nms_boxes, shape=(-1, 4)) # (h * w * num_anchors, num_classes) box_scores = K.reshape(box_confidence * box_class_probs, shape=(-1, num_classes)) raw_boxes.append(nms_boxes) raw_scores.append(box_scores) raw_boxes = K.concatenate(raw_boxes, axis=0) raw_scores = K.concatenate(raw_scores, axis=0) max_boxes_tensor = K.constant(max_boxes, dtype='int32') mask = raw_scores > score_threshold # (h * w * num_anchors, num_classes) boxes = [] scores = [] classes = [] for c in range(num_classes): effective_boxes = tf.boolean_mask(raw_boxes, mask[..., c]) effective_scores = tf.boolean_mask(raw_scores[..., c], mask[..., c]) nms_index = tf.image.non_max_suppression(effective_boxes, effective_scores, max_boxes_tensor, iou_threshold=iou_threshold) effective_boxes = K.gather(effective_boxes, nms_index) effective_scores = K.gather(effective_scores, nms_index) effective_classes = K.zeros_like(effective_scores, dtype='int32') + c boxes.append(effective_boxes) scores.append(effective_scores) classes.append(effective_classes) boxes = K.concatenate(boxes, axis=0) # x_min, y_min, x_max, y_max boxes = K.concatenate( [boxes[..., 1:2], boxes[..., 0:1], boxes[..., 3:4], boxes[..., 2:3]], axis=-1) scores = K.concatenate(scores, axis=0) classes = K.concatenate(classes, axis=0) return boxes, scores, classes