def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5): """ Applies Non-max suppression (NMS) to set of boxes Arguments: scores -- tensor of shape (None,), output of yolo_filter_boxes() boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later) classes -- tensor of shape (None,), output of yolo_filter_boxes() max_boxes -- integer, maximum number of predicted boxes you'd like iou_threshold -- real value, "intersection over union" threshold used for NMS filtering Returns: scores -- tensor of shape (, None), predicted score for each box boxes -- tensor of shape (4, None), predicted box coordinates classes -- tensor of shape (, None), predicted class for each box """ max_boxes_tensor = K.variable(max_boxes, dtype='int32') # tensor to be used in tf.image.non_max_suppression() K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep nms_indices = tf.image.non_max_suppression(boxes,scores,max_boxes_tensor,iou_threshold=iou_threshold) # Use K.gather() to select only nms_indices from scores, boxes and classes scores = K.gather(scores,nms_indices) boxes = K.gather(boxes,nms_indices) classes = K.gather(classes,nms_indices) return scores, boxes, classes
def call(self): E = K.variable(np.random.random((1000,100)), name="entity_embeddings") R = K.variable(np.random.random((10,10000)), name="relation_embeddings") x = K.placeholder(shape=(1,3), name="spo") y = K.placeholder(ndim=0, name="y") batch_placeholder = K.cast(x, 'int32')[0] # print(batch_placeholder.eval()) s, o, p = [batch_placeholder[i] for i in range(3)] s2v = K.gather(E, s) o2v = K.gather(E, o) r2v = K.gather(R, p) def ccorr(a, b): return T.outer(a,b).flatten() # return T.arctan(s2v) + T.arctan(o2v) # return (s2v.dimshuffle('x', 'x', 0, 'x') + o2v.dimshuffle('x', 'x', 0, 'x')).flatten() # return T.nnet.conv2d(a.dimshuffle('x', 'x', 0, 'x'), b.dimshuffle('x', 'x', 0, 'x'), None, # None, # filter_flip=True, border_mode='half') # return self.ccorr1d_sc(a, b, border_mode='half') eta = K.dot(r2v, ccorr(s2v, o2v)) # py = 1/(1+K.exp(-eta)) # l = -K.log(py) # from theano import pp, function, printing # grad = T.grad(eta, E) # print(pp(grad)) # func = function([x], grad) func = K.function([x, y], K.gradients(eta, [s2v, o2v, r2v, E, R])) # for i in func.maker.fgraph.outputs: # print(pp(i)) # print (T.grad(py, s2v)) print (func([[[1,2,3]], -1]))
def yolo_eval(yolo_outputs, image_shape, max_boxes=10, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input batch and return filtered boxes.""" box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs boxes = yolo_boxes_to_corners(box_xy, box_wh) boxes, scores, classes = yolo_filter_boxes( boxes, box_confidence, box_class_probs, threshold=score_threshold) # Scale boxes back to original image shape. height = image_shape[0] width = image_shape[1] image_dims = K.stack([height, width, height, width]) image_dims = K.reshape(image_dims, [1, 4]) boxes = boxes * image_dims # TODO: Something must be done about this ugly hack! max_boxes_tensor = K.variable(max_boxes, dtype='int32') K.get_session().run(tf.variables_initializer([max_boxes_tensor])) nms_index = tf.image.non_max_suppression( boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) boxes = K.gather(boxes, nms_index) scores = K.gather(scores, nms_index) classes = K.gather(classes, nms_index) return boxes, scores, classes
def call(self, x, mask=None): if isinstance(x, list): x,_ = x if mask is not None and isinstance(mask, list): mask,_ = mask if 0. < self.dropout < 1.: retain_p = 1. - self.dropout dims = self.W._keras_shape[:-1] B = K.random_binomial(dims, p=retain_p) * (1. / retain_p) B = K.expand_dims(B) W = K.in_train_phase(self.W * B, self.W) else: W = self.W if self.mode == 'matrix': return K.gather(W,x) elif self.mode == 'tensor': # quick and dirty: only allowing for 3dim inputs when it's tensor mode assert K.ndim(x) == 3 # put sequence on first; gather; take diagonal across shared batch dimension # in other words, W is (B, S, F) # incoming x is (B, S, A) inds = K.arange(self.W._keras_shape[0]) #out = K.gather(K.permute_dimensions(W, (1,0,2)), x).diagonal(axis1=0, axis2=3) #return K.permute_dimensions(out, (3,0,1,2)) ### method above doesn't do grads =.= # tensor abc goes to bac, indexed onto with xyz, goes to xyzac, # x == a, so shape to xayzc == xxyzc # take diagonal on first two: xyzc #out = K.colgather() out = K.gather(K.permute_dimensions(W, (1,0,2)), x) out = K.permute_dimensions(out, (0,3,1,2,4)) out = K.gather(out, (inds, inds)) return out else: raise Exception('sanity check. should not be here.') #all_dims = T.arange(len(self.W._keras_shape)) #first_shuffle = [all_dims[self.embed_dim]] + all_dims[:self.embed_dim] + all_dims[self.embed_dim+1:] ## 1. take diagonal from 0th to ## chang eof tactics ## embed on time or embed on batch. that's all I'm supporting. ## if it's embed on time, then, x.ndim+1 is where batch will be, and is what ## i need to take the diagonal over. ## now dim shuffle the xdims + 1 to the front. #todo: get second shuffle or maybe find diagonal calculations #out = K.gather(W, x) #return out ### reference #A = S(np.arange(60).reshape(3,4,5)) #x = S(np.random.randint(0, 4, (3,4,10))) #x_emb = A.dimshuffle(1,0,2)[x].dimshuffle(0,3,1,2,4)[T.arange(A.shape[0]), T.arange(A.shape[0])]
def test_gather(self): shape = (10, 2, 3) ref = np.arange(np.prod(shape)).reshape(shape) ref_th = KTH.variable(ref) ref_tf = KTF.variable(ref) inds = [1, 3, 7, 9] inds_th = KTH.variable(inds, dtype='int32') inds_tf = KTF.variable(inds, dtype='int32') th_z = KTH.gather(ref_th, inds_th) th_result = KTH.eval(th_z) tf_result = KTF.eval(KTF.gather(ref_tf, inds_tf)) assert_allclose(tf_result, th_result, atol=1e-05) if hasattr(th_z, '_keras_shape'): assert th_z._keras_shape == th_result.shape # test theano shape inference when # input shape has None entries if K.backend() == 'theano': x = K.placeholder(shape=(None, 3, 4)) indices = K.placeholder(shape=(5, 6), dtype='int32') y = K.gather(x, indices) assert y._keras_shape == (5, 6, 3, 4)
def call(self, x, mask=None): batch_placeholder = K.cast(x, 'int32')[0] s, o, p = [batch_placeholder[i] for i in range(3)] s2v = K.gather(self.E, s) o2v = K.gather(self.E, o) r2v = K.gather(self.R, p) # print(K.shape(s2v).eval()) # print(self.E[[0]].shape.eval()) def ccorr(a, b): return self.ccorr1d_sc(a, b, border_mode='half') eta = K.dot(K.transpose(r2v), ccorr(s2v, o2v)) return eta
def eval(outputs, anchors, num_classes, image_shape, max_boxes=20, score_threshold=.6, iou_threshold=.5): '''Evaluate the YOLO model on given input and return filtered boxes''' num_layers = len(outputs) anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [ [3, 4, 5], [1, 2, 3]] input_shape = K.shape(outputs[0])[1:3] * 32 boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = boxes_and_scores(outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use Keras backend instead of tf. class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def call(self, x, mask=None): batch_placeholder = K.cast(x, 'int32')[0] s, o, p = [batch_placeholder[i] for i in range(3)] s2v = K.gather(self.E, s) o2v = K.gather(self.E, o) r2v = K.gather(self.R, p) def ccorr(a, b): return T.outer(a,b).flatten() # return self.ccorr1d_sc(a, b, border_mode='half') eta = K.dot(r2v, ccorr(s2v, o2v)) # func = K.function([s2v,o2v,r2v], K.gradients(K.sigmoid(eta), [s2v,o2v,r2v])) # print(func([np.random.random(150),np.random.random(150),np.random.random(150)])) return eta
def construct_perturbed_input(perturb_mapping, onehot_vectors): """ :param perturb_mapping: :param onehot_vectors: :return: """ return K.gather(perturb_mapping, onehot_vectors)
def call(self, x, mask=None): if 0. < self.dropout < 1.: retain_p = 1. - self.dropout B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p) B = K.expand_dims(B) W = K.in_train_phase(self.W * B, self.W) else: W = self.W out = K.gather(W, x) return out
def call(self, x, mask=None): if 0. < self.dropout < 1.: retain_p = 1. - self.dropout B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p) B = K.expand_dims(B) W = K.in_train_phase(self.W * B, self.W) else: W = self.W W_ = T.concatenate([self.zeros_vector, W], axis=0) out = K.gather(W_, x) return out
def lookup(self, x, W, memory_length): # shape: (batch*memory_length, input_length) x = K.cast(K.reshape(x, (-1, self.input_length)), 'int32') mask = K.expand_dims(K.not_equal(x, 0.), dim=-1) # shape: (batch*memory_length, input_length, output_dim) X = K.gather(W, x) if self.bow_mode == "bow": # shape: (batch*memory_length, output_dim) X = K.sum(X + K.expand_dims(self.Te, 0), axis=1) # shape: (batch, memory_length, output_dim) X = K.reshape(X, (-1, memory_length, self.output_dim)) return X, mask
def call(self, x, mask=None): batch_placeholder = K.cast(x, 'int32')[0] s, o, p = [batch_placeholder[i] for i in range(3)] s2v = K.gather(self.E, s) o2v = K.gather(self.E, o) r2v = K.gather(self.R, p) def ccorr(a, b): # Return tensor product - basically bilinear/RESCAL models return T.outer(a,b).flatten() # Or cross-correlation op? # return T.nnet.conv2d(a.dimshuffle('x', 'x', 0, 'x'), b.dimshuffle('x', 'x', 0, 'x'), None, # None, # filter_flip=True, border_mode='half').flatten()[:-1] # return self.ccorr1d_sc(a, b, border_mode='half') eta = K.dot(r2v, ccorr(s2v, o2v)) # func = K.function([s2v,o2v,r2v], K.gradients(K.sigmoid(eta), [s2v,o2v,r2v])) # print(func([np.random.random(150),np.random.random(150),np.random.random(150)])) return eta
def call(self, x, mask=None): if K.dtype(x) != 'int32': x = K.cast(x, 'int32') if 0. < self.dropout < 1.: retain_p = 1. - self.dropout B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p) B = K.expand_dims(B) W = K.in_train_phase(self.W * B, self.W) else: W = self.W denorm = K.sum(W, axis=0) W = W / denorm out = K.gather(W, x) return out
def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5): """ Applies Non-max suppression (NMS) to set of boxes Arguments: scores -- tensor of shape (None,), output of yolo_filter_boxes() boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later) classes -- tensor of shape (None,), output of yolo_filter_boxes() max_boxes -- integer, maximum number of predicted boxes you'd like iou_threshold -- real value, "intersection over union" threshold used for NMS filtering Returns: scores -- tensor of shape (, None), predicted score for each box boxes -- tensor of shape (4, None), predicted box coordinates classes -- tensor of shape (, None), predicted class for each box Note: The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this function will transpose the shapes of scores, boxes, classes. This is made for convenience. """ max_boxes_tensor = K.variable(max_boxes, dtype='int32') # tensor to be used in tf.image.non_max_suppression() K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep ### START CODE HERE ### (≈ 1 line) nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold) ### END CODE HERE ### # Use K.gather() to select only nms_indices from scores, boxes and classes ### START CODE HERE ### (≈ 3 lines) scores = K.gather(scores, nms_indices) boxes = K.gather(boxes, nms_indices) classes = K.gather(classes, nms_indices) ### END CODE HERE ### return scores, boxes, classes
def call(self, inputs, mask=None): if not isinstance(inputs, list) or len(inputs) <= 1: raise TypeError('SelectSpkMemory must be called on a list of tensors ' '(at least 2). Got: ' + str(inputs)) # (None(batch), 1), speaker identity target_spk_l = inputs[0] target_spk_l = K.reshape(target_spk_l, (target_spk_l.shape[0], )) if K.dtype(target_spk_l) != 'int32': target_spk_l = K.cast(target_spk_l, 'int32') # (None(batch), spk_size, embed_dim), life-long memory life_long_mem = inputs[1] # Extract the acoustic feature from memory spk_memory = K.gather(life_long_mem, target_spk_l) # (None(batch), embed_dim) return spk_memory
def get_output(self, train=False): X = self.get_input(train) retain_p = 1. - self.dropout if train and self.dropout > 0: B = K.random_binomial((self.input_dim,), p=retain_p) else: B = K.ones((self.input_dim)) * retain_p # we zero-out rows of W at random Xs = K.cast(K.reshape(X, (-1, self.nb_words)), 'int32') # (samples*input_length, nb_words, dim) out = K.gather(self.W * K.expand_dims(B), Xs) out = K.reshape(out, (-1, self.input_length, self.nb_words, self.output_dim)) # (samples, input_length, nb_words, dim) out = out * K.expand_dims(K.not_equal(X, 0), dim=-1) if self.bow_mode == "bow": out = K.sum(out, axis=2) return out
def call(self, inputs): #return x[self.dtw_y] x, dtw_y = inputs y = K.gather(x, dtw_y) return y
def _interpolate(self, image, sampled_grids, output_size): batch_size = K.shape(image)[0] height = K.shape(image)[1] width = K.shape(image)[2] num_channels = K.shape(image)[3] x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32') y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32') x = .5 * (x + 1.0) * K.cast(width, dtype='float32') y = .5 * (y + 1.0) * K.cast(height, dtype='float32') x0 = K.cast(x, 'int32') x1 = x0 + 1 y0 = K.cast(y, 'int32') y1 = y0 + 1 max_x = int(K.int_shape(image)[2] - 1) max_y = int(K.int_shape(image)[1] - 1) x0 = K.clip(x0, 0, max_x) x1 = K.clip(x1, 0, max_x) y0 = K.clip(y0, 0, max_y) y1 = K.clip(y1, 0, max_y) pixels_batch = K.arange(0, batch_size) * (height * width) pixels_batch = K.expand_dims(pixels_batch, axis=-1) flat_output_size = output_size[0] * output_size[1] base = K.repeat_elements(pixels_batch, flat_output_size, axis=1) base = K.flatten(base) # base_y0 = base + (y0 * width) base_y0 = y0 * width base_y0 = base + base_y0 # base_y1 = base + (y1 * width) base_y1 = y1 * width base_y1 = base_y1 + base indices_a = base_y0 + x0 indices_b = base_y1 + x0 indices_c = base_y0 + x1 indices_d = base_y1 + x1 flat_image = K.reshape(image, shape=(-1, num_channels)) flat_image = K.cast(flat_image, dtype='float32') pixel_values_a = K.gather(flat_image, indices_a) pixel_values_b = K.gather(flat_image, indices_b) pixel_values_c = K.gather(flat_image, indices_c) pixel_values_d = K.gather(flat_image, indices_d) x0 = K.cast(x0, 'float32') x1 = K.cast(x1, 'float32') y0 = K.cast(y0, 'float32') y1 = K.cast(y1, 'float32') area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1) area_b = K.expand_dims(((x1 - x) * (y - y0)), 1) area_c = K.expand_dims(((x - x0) * (y1 - y)), 1) area_d = K.expand_dims(((x - x0) * (y - y0)), 1) values_a = area_a * pixel_values_a values_b = area_b * pixel_values_b values_c = area_c * pixel_values_c values_d = area_d * pixel_values_d return values_a + values_b + values_c + values_d
def get_output(self, train=False): X = self.get_input(train) out = K.gather(self.W, X) return out
def DecodeBox(outputs, anchors, num_classes, image_shape, input_shape, #-----------------------------------------------------------# # 13x13的特征层对应的anchor是[81,82],[135,169],[344,319] # 26x26的特征层对应的anchor是[10,14],[23,27],[37,58] #-----------------------------------------------------------# anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]], max_boxes = 100, confidence = 0.5, nms_iou = 0.3, letterbox_image = True): box_xy = [] box_wh = [] box_confidence = [] box_class_probs = [] for i in range(len(outputs)): sub_box_xy, sub_box_wh, sub_box_confidence, sub_box_class_probs = \ get_anchors_and_decode(outputs[i], anchors[anchor_mask[i]], num_classes, input_shape) box_xy.append(K.reshape(sub_box_xy, [-1, 2])) box_wh.append(K.reshape(sub_box_wh, [-1, 2])) box_confidence.append(K.reshape(sub_box_confidence, [-1, 1])) box_class_probs.append(K.reshape(sub_box_class_probs, [-1, num_classes])) box_xy = K.concatenate(box_xy, axis = 0) box_wh = K.concatenate(box_wh, axis = 0) box_confidence = K.concatenate(box_confidence, axis = 0) box_class_probs = K.concatenate(box_class_probs, axis = 0) #------------------------------------------------------------------------------------------------------------# # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条,因此生成的box_xy, box_wh是相对于有灰条的图像的 # 我们需要对其进行修改,去除灰条的部分。 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax # 如果没有使用letterbox_image也需要将归一化后的box_xy, box_wh调整成相对于原图大小的 #------------------------------------------------------------------------------------------------------------# boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image) box_scores = box_confidence * box_class_probs #-----------------------------------------------------------# # 判断得分是否大于score_threshold #-----------------------------------------------------------# mask = box_scores >= confidence max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_out = [] scores_out = [] classes_out = [] for c in range(num_classes): #-----------------------------------------------------------# # 取出所有box_scores >= score_threshold的框,和成绩 #-----------------------------------------------------------# class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) #-----------------------------------------------------------# # 非极大抑制 # 保留一定区域内得分最大的框 #-----------------------------------------------------------# nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=nms_iou) #-----------------------------------------------------------# # 获取非极大抑制后的结果 # 下列三个分别是:框的位置,得分与种类 #-----------------------------------------------------------# class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_out.append(class_boxes) scores_out.append(class_box_scores) classes_out.append(classes) boxes_out = K.concatenate(boxes_out, axis=0) scores_out = K.concatenate(scores_out, axis=0) classes_out = K.concatenate(classes_out, axis=0) return boxes_out, scores_out, classes_out
def call(self, x, mask=None): x = K.maximum(K.minimum(x, self.model_dims[1] - 1), 0) return K.gather(self.W, x)
def batch_gather(reference, indices): ref_shape = K.shape(reference) batch_size = ref_shape[0] n_classes = ref_shape[1] flat_indices = K.arange(0, batch_size) * n_classes + K.flatten(indices) return K.gather(K.flatten(reference), flat_indices)
def load_generator_network(batch_size, sequence_class, n_classes=1, seq_length=205, supply_inputs=False, gan_func=gan_func): sequence_class_onehots = np.eye(n_classes) #Generator network parameters latent_size = 100 out_seed_size = 100 #Generator inputs latent_input_1 = Input(tensor=K.ones((batch_size, latent_size)), name='noise_input_1') latent_input_2 = Input(tensor=K.ones((batch_size, latent_size)), name='noise_input_2') latent_input_1_out = Lambda(lambda inp: inp * K.random_uniform( (batch_size, latent_size), minval=-1.0, maxval=1.0), name='lambda_rand_input_1')(latent_input_1) latent_input_2_out = Lambda(lambda inp: inp * K.random_uniform( (batch_size, latent_size), minval=-1.0, maxval=1.0), name='lambda_rand_input_2')(latent_input_2) class_embedding = Lambda( lambda x: K.gather(K.constant(sequence_class_onehots), K.cast(x[:, 0], dtype='int32')))(sequence_class) seed_input_1 = Concatenate(axis=-1)( [latent_input_1_out, class_embedding]) seed_input_2 = Concatenate(axis=-1)( [latent_input_2_out, class_embedding]) #Policy network definition policy_dense_0 = Dense(128, activation='linear', kernel_initializer='glorot_uniform', name='policy_dense_0') batch_norm_0 = BatchNormalization(name='policy_batch_norm_0') relu_0 = Lambda(lambda x: K.relu(x)) policy_dense_1 = Dense(128, activation='linear', kernel_initializer='glorot_uniform', name='policy_dense_1') batch_norm_1 = BatchNormalization(name='policy_batch_norm_1') relu_1 = Lambda(lambda x: K.relu(x)) policy_dense_2 = Dense(out_seed_size, activation='linear', kernel_initializer='glorot_uniform', name='policy_dense_2') seed_out_1 = policy_dense_2( relu_1( batch_norm_1( policy_dense_1( relu_0(batch_norm_0(policy_dense_0(seed_input_1))))))) seed_out_2 = policy_dense_2( relu_1( batch_norm_1( policy_dense_1( relu_0(batch_norm_0(policy_dense_0(seed_input_2))))))) policy_out_1 = gan_func(seed_out_1) policy_out_2 = gan_func(seed_out_2) return [latent_input_1, latent_input_2], [policy_out_1, policy_out_2], [seed_out_1, seed_out_2]
style_losses = get_style_losses(outputs_dict, style_targets_dict, args.style_layers, norm_by_channels=args.norm_by_channels) content_losses = get_content_losses(outputs_dict, content_targets_dict, args.content_layers) # Use total variation to improve local coherence total_var_loss = tv_loss(pastiche_net.output) weighted_style_losses = [] weighted_content_losses = [] # Compute total loss total_loss = K.variable(0.) for loss in style_losses: weighted_loss = K.mean(K.gather(style_weights, class_targets) * loss) weighted_style_losses.append(weighted_loss) total_loss += weighted_loss for loss in content_losses: weighted_loss = K.mean(K.gather(content_weights, class_targets) * loss) weighted_content_losses.append(weighted_loss) total_loss += weighted_loss weighted_tv_loss = K.mean(K.gather(tv_weights, class_targets) * total_var_loss) total_loss += weighted_tv_loss ## Make training function # Get a list of inputs
def yolo_eval(yolo_outputs, anchors, num_classes, image_shape, max_boxes=20, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input and return filtered boxes.""" """ num_layers,输出特征图的层数,3层; anchor_mask,将anchors划分为3个层,第1层13x13是678,第2层26x26是345,第3层52x52是012; input_shape:输入图像的尺寸,也就是第0个特征图的尺寸乘以32,即13x32=416,这与Darknet的网络结构有关。 特征图越大,13->52,检测的物体越小,需要的anchors越小,所以anchors列表以倒序赋值。 """ num_layers = len(yolo_outputs) anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[ 3, 4, 5 ], [1, 2, 3]] # default setting input_shape = K.shape(yolo_outputs[0])[1:3] * 32 """ 接着,在YOLO的第l层输出yolo_outputs中,调用yolo_boxes_and_scores(), 提取框_boxes和置信度_box_scores,将3个层的框数据放入列表boxes和box_scores, 再拼接concatenate展平,输出的数据就是所有的框和置信度。 其中,输出的boxes和box_scores的格式,如下: boxes: (?, 4) # ?是框数 box_scores: (?, 80) """ boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) # concatenate的作用是:将多个层的数据展平,因为框已经还原为真实坐标,不同尺度没有差异 boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def call(self, inputs, **kwargs): """ Creates the layer as a Keras graph Notes: This does not add self loops to the adjacency matrix. The output indices are only used when `final_layer=True` Args: inputs (list): list of inputs with 4 items: node features (size b x N x F), output indices (size b x M), sparse graph adjacency matrix (size N x N), where N is the number of nodes in the graph, F is the dimensionality of node features M is the number of output nodes """ X = inputs[0] # Node features (1 x N x F) out_indices = inputs[1] # output indices (1 x K) A_sparse = inputs[2] # Adjacency matrix (1 x N x N) if not isinstance(A_sparse, K.tf.SparseTensor): raise TypeError("A is not sparse") # Get undirected graph edges (E x 2) A_indices = A_sparse.indices batch_dim, n_nodes, _ = K.int_shape(X) if batch_dim != 1: raise ValueError( "Currently full-batch methods only support a batch dimension of one" ) else: # Remove singleton batch dimension out_indices = K.squeeze(out_indices, 0) X = K.squeeze(X, 0) outputs = [] for head in range(self.attn_heads): kernel = self.kernels[head] # W in the paper (F x F') attention_kernel = self.attn_kernels[ head] # Attention kernel a in the paper (2F' x 1) # Compute inputs to attention network features = K.dot(X, kernel) # (N x F') # Compute feature combinations # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_j]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j] attn_for_self = K.dot( features, attention_kernel[0]) # (N x 1), [a_1]^T [Wh_i] attn_for_neighs = K.dot( features, attention_kernel[1]) # (N x 1), [a_2]^T [Wh_j] # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]] dense = attn_for_self + K.transpose( attn_for_neighs) # (N x N) via broadcasting # Create sparse attention vector (All non-zero values of the matrix) sparse_attn_self = K.tf.gather(K.reshape(attn_for_self, [-1]), A_indices[:, 0], axis=0) sparse_attn_neighs = K.tf.gather(K.reshape(attn_for_neighs, [-1]), A_indices[:, 1], axis=0) attn_values = sparse_attn_self + sparse_attn_neighs # Add nonlinearity attn_values = LeakyReLU(alpha=0.2)(attn_values) # Apply dropout to features and attention coefficients dropout_feat = Dropout(self.in_dropout_rate)(features) # (N x F') dropout_attn = Dropout(self.attn_dropout_rate)( attn_values) # (N x N) # Convert to sparse matrix sparse_attn = K.tf.sparse.SparseTensor( A_indices, values=dropout_attn, dense_shape=[n_nodes, n_nodes]) # Apply softmax to get attention coefficients sparse_attn = K.tf.sparse.softmax( sparse_attn) # (N x N), Eq. 3 of the paper # Linear combination with neighbors' features [YT: see Eq. 4] node_features = K.tf.sparse.matmul(sparse_attn, dropout_feat) # (N x F') if self.use_bias: node_features = K.bias_add(node_features, self.biases[head]) # Add output of attention head to final output outputs.append(node_features) # Aggregate the heads' output according to the reduction method if self.attn_heads_reduction == "concat": output = K.concatenate(outputs) # (N x KF') else: output = K.mean(K.stack(outputs), axis=0) # N x F') output = self.activation(output) # On the final layer we gather the nodes referenced by the indices if self.final_layer: output = K.gather(output, out_indices) # Add batch dimension back if we removed it if batch_dim == 1: output = K.expand_dims(output, 0) return output
def call(self, inputs): """ Creates the layer as a Keras graph. Note that the inputs are tensors with a batch dimension of 1: Keras requires this batch dimension, and for full-batch methods we only have a single "batch". There are three inputs required, the node features, the output indices (the nodes that are to be selected in the final layer) and the graph adjacency matrix Notes: This does not add self loops to the adjacency matrix. The output indices are only used when ``final_layer=True`` Args: inputs (list): list of inputs with 3 items: node features (size 1 x N x F), output indices (size 1 x M), graph adjacency matrix (size N x N), where N is the number of nodes in the graph, F is the dimensionality of node features M is the number of output nodes """ X = inputs[0] # Node features (1 x N x F) out_indices = inputs[1] # output indices (1 x K) A = inputs[2] # Adjacency matrix (N x N) batch_dim, n_nodes, _ = K.int_shape(X) if batch_dim != 1: raise ValueError( "Currently full-batch methods only support a batch dimension of one" ) else: # Remove singleton batch dimension X = K.squeeze(X, 0) out_indices = K.squeeze(out_indices, 0) outputs = [] for head in range(self.attn_heads): kernel = self.kernels[head] # W in the paper (F x F') attention_kernel = self.attn_kernels[ head] # Attention kernel a in the paper (2F' x 1) # Compute inputs to attention network features = K.dot(X, kernel) # (N x F') # Compute feature combinations # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j] attn_for_self = K.dot( features, attention_kernel[0]) # (N x 1), [a_1]^T [Wh_i] attn_for_neighs = K.dot( features, attention_kernel[1]) # (N x 1), [a_2]^T [Wh_j] # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]] dense = attn_for_self + K.transpose( attn_for_neighs) # (N x N) via broadcasting # Add nonlinearity dense = LeakyReLU(alpha=0.2)(dense) # Mask values before activation (Vaswani et al., 2017) # YT: this only works for 'binary' A, not for 'weighted' A! # YT: if A does not have self-loops, the node itself will be masked, so A should have self-loops # YT: this is ensured by setting the diagonal elements of A tensor to 1 above mask = -10e9 * (1.0 - A) dense += mask # Apply softmax to get attention coefficients dense = K.softmax(dense, axis=1) # (N x N), Eq. 3 of the paper # Apply dropout to features and attention coefficients dropout_feat = Dropout(self.in_dropout_rate)(features) # (N x F') dropout_attn = Dropout(self.attn_dropout_rate)(dense) # (N x N) # Linear combination with neighbors' features [YT: see Eq. 4] node_features = K.dot(dropout_attn, dropout_feat) # (N x F') if self.use_bias: node_features = K.bias_add(node_features, self.biases[head]) # Add output of attention head to final output outputs.append(node_features) # Aggregate the heads' output according to the reduction method if self.attn_heads_reduction == "concat": output = K.concatenate(outputs) # (N x KF') else: output = K.mean(K.stack(outputs), axis=0) # N x F') # Nonlinear activation function output = self.activation(output) # On the final layer we gather the nodes referenced by the indices if self.final_layer: output = K.gather(output, out_indices) # Add batch dimension back if we removed it if batch_dim == 1: output = K.expand_dims(output, 0) return output
def d_acc(x): ''' Calculate detection metrics for a single sample Parameters: x: a tuple for (y_true, y_pred) where y_pred is post-eval output from model ''' max_boxes = 20 # TODO: this should be some sort of global constant y_true = x[0] y_pred = x[1] # convert y_true to list of boxes and classes pred_boxes, pred_scores, pred_classes = eval(y_pred, image_shape, max_boxes=max_boxes) true_box, true_mask, true_class = true_boxes_true_masks_true_classes( y_true) true_mask = K.cast(true_mask, dtype='bool') true_box = K.squeeze(true_box, axis=2) # Note: for batch processing, axis=3 true_box = tf.boolean_mask(true_box, true_mask) true_class = tf.boolean_mask(true_class, true_mask) height, width = image_shape image_dims = K.stack([height, width, height, width]) image_dims = K.cast(K.reshape(image_dims, (1, 4)), K.floatx()) true_box = true_box * image_dims # need to compare the list of box and class predictions between ground truth and prediction: # pred_boxes, pred_classes, true_box, true_class iou_matrix = iou(pred_boxes[:, tf.newaxis, :], true_box[tf.newaxis, :, :]) # case of >2 predictions targeting 1 true box, we keep the one with highest IOU iou_matrix = iou_matrix * K.cast( iou_matrix - K.max(iou_matrix, axis=0, keepdims=True) >= 0.0, dtype=K.floatx()) # case of >2 true boxes with 1 prediction iou_matrix = iou_matrix * K.cast( iou_matrix - K.max(iou_matrix, axis=1, keepdims=True) >= 0.0, dtype=K.floatx()) #matched_prediction_idx, matched_truth_idx = K.squeeze(np.nonzero(K.maximum(iou_matrix - 0.5, 0))) iou_matrix = K.maximum(iou_matrix - 0.5, 0) zero = K.constant(0, dtype=K.floatx()) # tf way of doing np.nonzero(...) where = K.not_equal(iou_matrix, zero) where = tf.where(where) matched_prediction_idx = where[..., 0] matched_truth_idx = where[..., 1] # calculate precision, recall and f1 # precision = # true positives / # prediction made (What proportion of positive identifications was actually correct?) tot_num_predictions = K.cast(K.shape(pred_boxes)[0], K.floatx()) tot_num_ground_truths = K.cast(K.shape(true_box)[0], K.floatx()) num_true_positives = K.sum( K.cast( K.equal(K.gather(pred_classes, matched_prediction_idx), K.gather(true_class, matched_truth_idx)), K.floatx())) # do these for numerical stability, # of true positives or # of predictions can be 0. num_true_positives = num_true_positives + K.epsilon() tot_num_predictions = tot_num_predictions + K.epsilon() tot_num_ground_truths = tot_num_ground_truths + K.epsilon() precision = num_true_positives / tot_num_predictions # recall = # correct prediction / # of positive ground truth observations (What proportion of actual positives was identified correctly?) recall = num_true_positives / tot_num_ground_truths f1 = 2.0 * (precision * recall) / (precision + recall) return f1
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') output = K.gather(self.embeddings, inputs) return output
def _subsampling(self, normalized_rois, gt_bboxes, gt_labels, pos_iou_thresh=0.5, exclusive_iou_tresh=0.1, pos_ratio=0.25): """正解データとのIoUを基にRoIをサンプリングする。 IoUがpos_iou_thresh以上であるRoIをオブジェクトとみなす。 オブジェクトはサンプルの25%以内とする。(n_samples_per_batch * pos_ratio 以内) pos_iou_thresh未満、exclusive_iou_thresh以上は非オブジェクトとみなす。 exclusive_iou_thresh未満は偶然の一致であり意味なし(難解)なので無視。 ※論文ではheuristic for hard example mining.と記載されている点。 バッチ毎のサンプル数はn_samples_per_batch以内とする。 (n_samples_per_batch未満の場合は、n_samples_per_batchになるよう0パディングする。) 上記のサンプリングに対応する正解データのラベル、また、BBoxとのオフセットも得る。 Args: normalized_rois (tensor) : RegionProposalLayerで得られたRoI。 (N, n_rois, 4) 3軸目は領域の左上と右下の座標が0〜1に正規化された値。 入力画像サイズの高さ、幅で除算することで正規化された値。 (y1, x1, y2, x2) gt_bboxes (ndarray) : 正解BBox。 (N, config.n_max_gt_objects_per_image, 4) 座標は正規化されていない。 gt_labels (ndarray) : 正解ラベル。 (N, config.n_max_gt_objects_per_image) ==0:背景データ >=1:オブジェクト Returns: sample_rois (tensor): サンプリングしたRoI。 (N, n_samples_per_batch, 4) 3軸目の座標は0〜1に正規化された値。 sample_gt_offset (tensor): サンプリングしたRoIに対応するBBoxとのオフセット。 (N, n_samples_per_batch, 4) 3軸目の座標は0〜1に正規化された値をself.config.bbox_refinement_stdで割ることで標準化した値。 sample_gt_labels (tensor): サンプリングしたRoIに対応するBBoxのラベル。 (N, n_samples_per_batch) """ pos_roi_per_batch = round(self.n_samples_per_batch * pos_ratio) # gt_bboxesをnormalized_roisに合わせて正規化する。 # これでIoUが評価出来るようになる。 input_h = self.config.image_shape[0] input_w = self.config.image_shape[1] normalized_gt_bboxes = bbox.normalize_bbox(gt_bboxes, input_h, input_w) # 入力をバッチ毎に分割 normalized_rois = tf.split(normalized_rois, self.config.batch_size) normalized_gt_bboxes = tf.split(normalized_gt_bboxes, self.config.batch_size) gt_labels = tf.split(gt_labels, self.config.batch_size) sample_rois = [] sample_gt_offsets = [] sample_gt_labels = [] for roi, gt_bbox, gt_label in zip(normalized_rois, normalized_gt_bboxes, gt_labels): # 0次元目(バッチサイズ)は不要なので削除 roi = log.tfprint(roi, "roi: ") gt_bbox = log.tfprint(gt_bbox, "gt_bbox: ") gt_label = log.tfprint(gt_label, "gt_label: ") roi = K.squeeze(roi, 0) gt_bbox = K.squeeze(gt_bbox, 0) gt_label = K.squeeze(gt_label, 0) roi = log.tfprint(roi, "roi_squeezed: ") gt_bbox = log.tfprint(gt_bbox, "gt_bbox_squeezed: ") gt_label = log.tfprint(gt_label, "gt_label_squeezed: ") # ゼロパディング行を除外 # K.gather(zero, K.squeeze(tf.where(K.any(zero, axis=1)), -1) ) idx_roi_row = K.flatten(tf.where(K.any(roi, axis=1))) idx_gt_bbox = K.flatten(tf.where(K.any(gt_bbox, axis=1))) roi = K.gather(roi, idx_roi_row) # gt_bboxとgt_labelは行数と行の並びが同じなので同じidxを利用できる gt_bbox = K.gather(gt_bbox, idx_gt_bbox) gt_label = K.gather(gt_label, idx_gt_bbox) gt_bbox = log.tfprint(gt_bbox, "gt_bbox_gathered: ") gt_label = log.tfprint(gt_label, "gt_label_gathered: ") # IoUを求める。 # (n_rois, ) ious = bbox.get_iou_K(roi, gt_bbox) ious = log.tfprint(ious, "ious: ") # 各RoI毎にIoU最大のBBoxの位置を得る idx_max_gt = K.argmax(ious, axis=1) idx_max_gt = log.tfprint(idx_max_gt, "idx_max_gt: ") max_iou = K.max(ious, axis=1) # max_iouの行数はroiと同じになる max_iou = log.tfprint(max_iou, "max_iou: ") idx_pos = K.flatten(tf.where(max_iou >= pos_iou_thresh)) # positiveサンプル数をpos_roi_per_batch以内に制限 limit_pos = K.minimum(pos_roi_per_batch, K.shape(idx_pos)[0]) idx_pos = K.switch( K.shape(idx_pos)[0] > 0, tf.random_shuffle(idx_pos)[:limit_pos], idx_pos) limit_pos = log.tfprint(limit_pos, "limit_pos: ") idx_pos = log.tfprint(idx_pos, "idx_pos: ") # negativeサンプル数を # n_samples_per_batch - pos_roi_per_batch # に制限 idx_neg = K.flatten( tf.where((max_iou < pos_iou_thresh) & (max_iou >= exclusive_iou_tresh))) # negativeサンプル数は pos_roi_per_batch - limit_pos(つまり残り) 以内に制限 limit_neg = self.n_samples_per_batch - limit_pos limit_neg = K.minimum(limit_neg, K.shape(idx_neg)[0]) idx_neg = K.switch( K.shape(idx_neg)[0] > 0, tf.random_shuffle(idx_neg)[:limit_neg], idx_neg) limit_neg = log.tfprint(limit_neg, "limit_neg: ") idx_neg = log.tfprint(idx_neg, "idx_neg: ") # 返却するサンプルを抽出 # GTのoffsets, labelsは各roisに対応させる。つまり、同じ位置に格納する。 idx_keep = K.concatenate((idx_pos, idx_neg)) idx_keep = log.tfprint(idx_keep, "idx_keep: ") # 各RoIの最大IoUを示すIndexについても、上記返却するサンプルのみを残す。 idx_gt_keep = K.gather(idx_max_gt, idx_keep) # IoUが閾値以上のPositiveとみなされるサンプルのみを残すためのIndex。 idx_gt_keep_pos = K.gather(idx_max_gt, idx_pos) idx_gt_keep = log.tfprint(idx_gt_keep, "idx_gt_keep: ") sample_roi = K.gather(roi, idx_keep) sample_gt_offset = bbox.get_offset_K( sample_roi, K.gather(gt_bbox, idx_gt_keep)) # negativeな要素には0を設定 sample_gt_label = K.concatenate(( K.cast(K.gather(gt_label, idx_gt_keep_pos), dtype='int32'), K.zeros( [limit_neg], # K.zerosは0階テンソルを受け付けないので配列化。。。 dtype='int32'))) # 行数がn_samples_per_batch未満の場合は0パディング remain = tf.maximum( self.n_samples_per_batch - tf.shape(sample_roi)[0], 0) sample_roi = tf.pad(sample_roi, [(0, remain), (0, 0)], name='subsample_sample_roi') sample_gt_offset = tf.pad(sample_gt_offset, [(0, remain), (0, 0)], name='subsample_sample_gt_offset') sample_gt_offset /= self.config.bbox_refinement_std sample_gt_label = tf.pad(sample_gt_label, [(0, remain)], name='subsample_sample_gt_label') sample_roi = log.tfprint(sample_roi, "sample_roi: ") sample_gt_offset = log.tfprint(sample_gt_offset, "sample_gt_offset: ") sample_gt_label = log.tfprint(sample_gt_label, "sample_gt_label: ") sample_rois.append(sample_roi) sample_gt_offsets.append(sample_gt_offset) sample_gt_labels.append(sample_gt_label) return [ K.stack(sample_rois), K.stack(sample_gt_offsets), K.stack(sample_gt_labels) ]
def call(self, inputs): if k.dtype(inputs) != 'int32': inputs = k.cast(inputs, 'int32') out = k.gather(k.transpose(self.dbedl.kernel), inputs) # out2 = K.gather(self.embeddings, inputs) return out
def yolo_eval(yolo_outputs, anchors, num_classes, image_shape, max_boxes=40, score_threshold=.6, iou_threshold=.5, diff_class_iou_threshold=None): """Evaluate YOLO model on given input and return filtered boxes.""" num_layers = len(yolo_outputs) anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[ 3, 4, 5 ], [1, 2, 3]] # default setting input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) if diff_class_iou_threshold is not None: right_indics = tf.image.non_max_suppression( boxes_, scores_, max_boxes_tensor, iou_threshold=diff_class_iou_threshold) boxes_ = K.gather(boxes_, right_indics) scores_ = K.gather(scores_, right_indics) classes_ = K.gather(classes_, right_indics) return boxes_, scores_, classes_
def compute_loss(self, y_true, y_pred): batch_size = K.shape(y_true)[0] num_prior_boxes = K.cast(K.shape(y_true)[1], 'float') y_pred_localization = y_pred[:, :, :4] y_true_localization = y_true[:, :, :4] y_pred_classification = y_pred[:, :, 4:(4 + self.num_classes)] y_true_classification = y_true[:, :, 4:(4 + self.num_classes)] # loss for all priors boxes localization_loss = self._l1_smooth_loss(y_true_localization, y_pred_localization) classification_loss = self._softmax_loss(y_true_classification, y_pred_classification) int_positive_mask = 1 - y_true[:, :, 4 + self.background_id] num_positives = tf.reduce_sum(int_positive_mask, axis=-1) positive_localization_losses = (localization_loss * int_positive_mask ) #scalar times vector positive_classification_losses = (classification_loss * int_positive_mask) positive_classification_loss = K.sum(positive_classification_losses, 1) positive_localization_loss = K.sum(positive_localization_losses, 1) # TODO: Refactor/understand ---------------------------------------------- # every batch contains all priors: here we take the least amount of # negatives which depends on the amount of positives at every batch # at every set of priors. num_negatives/positives = (?, num_positives) # in the second num_positive_mask the values the concatenated value does # not get counted since you are doing greater than zero. # the most probable value that num_neg_batch will have is: # neg_pos_ratio * num_positives where num_positives is the batch element # with less positive boxes. num_negatives_1 = self.neg_pos_ratio * num_positives num_negatives_2 = num_prior_boxes - num_positives num_negatives = tf.minimum(num_negatives_1, num_negatives_2) #positive_num_negatives_mask = tf.greater(num_negatives, 0) num_positive_mask = tf.greater(num_negatives, 0) has_a_positive = tf.to_float(tf.reduce_any(num_positive_mask)) num_negatives = tf.concat( 0, [num_negatives, [(1 - has_a_positive) * self.negatives_for_hard]]) num_positive_mask = tf.greater(num_negatives, 0) num_neg_batch = tf.reduce_min( tf.boolean_mask(num_negatives, num_positive_mask)) num_neg_batch = tf.to_int32(num_neg_batch) # ---------------------------------------------------------------------- #class_start = 4 + self.background_id + 1 #class_end = class_start + self.num_classes - 1 # each prior box can only have one class then we take the max at axis 2 #best_class_scores = K.max(y_pred[:, :, class_start:], 2) # picking up the negative examples with the highest probability (highest loss) ### ?????? THIS IS WEIRD, the original implementation starts from 5: therefore it #### does not take into consideration the background boxes pred_class_values = K.max(y_pred_classification[:, :, 1:], axis=2) int_negatives_mask = y_true[:, :, 4 + self.background_id] pred_negative_class_values = pred_class_values * int_negatives_mask top_k_negative_indices = tf.nn.top_k(pred_negative_class_values, k=num_neg_batch)[1] batch_indices = K.expand_dims(K.arange(0, batch_size), 1) batch_indices = K.tile(batch_indices, (1, num_neg_batch)) batch_indices = K.flatten(batch_indices) * K.cast( num_prior_boxes, 'int32') full_indices = batch_indices + K.flatten(top_k_negative_indices) negative_classification_loss = K.gather(K.flatten(classification_loss), full_indices) negative_classification_loss = K.reshape(negative_classification_loss, [batch_size, num_neg_batch]) negative_classification_loss = K.sum(negative_classification_loss, 1) # loss is sum of positives and negatives total_loss = positive_classification_loss + negative_classification_loss num_prior_boxes_per_batch = num_positives + K.cast( num_neg_batch, 'float') total_loss = total_loss / num_prior_boxes_per_batch num_positives = tf.select(K.not_equal(num_positives, 0), num_positives, K.ones_like(num_positives)) positive_localization_loss = self.alpha * positive_classification_loss positive_localization_loss = positive_localization_loss / num_positives total_loss = total_loss + positive_localization_loss return total_loss
def linear_interpolate(self, images, sampled_grids, resampled_size): batch_size = K.shape(images)[0] height = K.shape(images)[1] width = K.shape(images)[2] number_of_channels = K.shape(images)[3] x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32') y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32') x = 0.5 * (x + 1.0) * K.cast(width, dtype='float32') y = 0.5 * (y + 1.0) * K.cast(height, dtype='float32') x0 = K.cast(x, dtype='int32') x1 = x0 + 1 y0 = K.cast(y, dtype='int32') y1 = y0 + 1 xMax = int(K.int_shape(images)[2] - 1) yMax = int(K.int_shape(images)[1] - 1) x0 = K.clip(x0, 0, xMax) x1 = K.clip(x1, 0, xMax) y0 = K.clip(y0, 0, yMax) y1 = K.clip(y1, 0, yMax) batch_pixels = K.arange(0, batch_size) * (height * width) batch_pixels = K.expand_dims(batch_pixels, axis=-1) base = K.repeat_elements(batch_pixels, rep=int(resampled_size[0] * resampled_size[1]), axis=1) base = K.flatten(base) indices00 = base + y0 * width + x0 indices01 = base + y1 * width + x0 indices10 = base + y0 * width + x1 indices11 = base + y1 * width + x1 flat_images = K.reshape(images, shape=(-1, number_of_channels)) flat_images = K.cast(flat_images, dtype='float32') pixelValues00 = K.gather(flat_images, indices00) pixelValues01 = K.gather(flat_images, indices01) pixelValues10 = K.gather(flat_images, indices10) pixelValues11 = K.gather(flat_images, indices11) x0 = K.cast(x0, dtype='float32') x1 = K.cast(x1, dtype='float32') y0 = K.cast(y0, dtype='float32') y1 = K.cast(y1, dtype='float32') weight00 = K.expand_dims(((x1 - x) * (y1 - y)), axis=1) weight01 = K.expand_dims(((x1 - x) * (y - y0)), axis=1) weight10 = K.expand_dims(((x - x0) * (y1 - y)), axis=1) weight11 = K.expand_dims(((x - x0) * (y - y0)), axis=1) interpolatedValues00 = weight00 * pixelValues00 interpolatedValues01 = weight01 * pixelValues01 interpolatedValues10 = weight10 * pixelValues10 interpolatedValues11 = weight11 * pixelValues11 interpolatedValues = (interpolatedValues00 + interpolatedValues01 + interpolatedValues10 + interpolatedValues11) return (interpolatedValues)
def yolo_eval(yolo_outputs, anchors, num_classes, image_shape, max_boxes=20, score_threshold=.6, iou_threshold=.5, letterbox_image=True): #---------------------------------------------------# # 获得特征层的数量,有效特征层的数量为3 #---------------------------------------------------# num_layers = len(yolo_outputs) #-----------------------------------------------------------# # 13x13的特征层对应的anchor是[116,90],[156,198],[373,326] # 26x26的特征层对应的anchor是[30,61],[62,45],[59,119] # 52x52的特征层对应的anchor是[10,13],[16,30],[33,23] #-----------------------------------------------------------# anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] #-----------------------------------------------------------# # 这里获得的是输入图片的大小,一般是416x416 #-----------------------------------------------------------# input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] #-----------------------------------------------------------# # 对每个特征层进行处理 #-----------------------------------------------------------# for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape, letterbox_image) boxes.append(_boxes) box_scores.append(_box_scores) #-----------------------------------------------------------# # 将每个特征层的结果进行堆叠 #-----------------------------------------------------------# boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) #-----------------------------------------------------------# # 判断得分是否大于score_threshold #-----------------------------------------------------------# mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): #-----------------------------------------------------------# # 取出所有box_scores >= score_threshold的框,和成绩 #-----------------------------------------------------------# class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) #-----------------------------------------------------------# # 非极大抑制 # 保留一定区域内得分最大的框 #-----------------------------------------------------------# nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) #-----------------------------------------------------------# # 获取非极大抑制后的结果 # 下列三个分别是 # 框的位置,得分与种类 #-----------------------------------------------------------# class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def yolo_eval( yolo_outputs, # 模型输出,格式如下:(?,13,13,255),(?,26,26,255),(?,52,52,255),?:batch size anchors, num_classes, # 80个类(coco) image_shape, max_boxes=20, #每张图每类最多检测到20个框同类别的IOU阈值 score_threshold=.6, iou_threshold=.5): # 每层分配三个anchor_mask,如13*13分配到[6,7,8] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] # 对每个特征层进行处理 for l in range(3): # _boxes -> (?,4),_box_scores -> (?,80) ?:框的数目 _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) # 将每个特征层的结果进行堆叠 boxes = K.concatenate(boxes, axis=0) # 将数据展平 -> (?,4) box_scores = K.concatenate(box_scores, axis=0) # 将数据展平 -> (?,1) mask = box_scores >= score_threshold # mask掩码,过滤小于score阈值的值,只保留大于阈值的值 max_boxes_tensor = K.constant(max_boxes, dtype='int32') # 最大检测框数为20 boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. # 筛出得分小于阈值的框 class_boxes = tf.boolean_mask(boxes, mask[:, c]) # 通过掩码mask和类别c筛选框boxes class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) # 通过掩码mask和类别筛选box_scores # 运行非极大值抑制,得到通过抑制的索引 nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) # 检索张量class_boxes中索引为nms_index的元素 class_box_scores = K.gather( class_box_scores, nms_index) # 检索张量class_box_scores中索引为nms_index的元素 classes = K.ones_like( class_box_scores, 'int32') * c # K.ones_like实例化与张量class_box_scores具有相同形状的全1变量 boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def shuffling(x): idxs = K.arange(0, K.shape(x)[0]) idxs = K.tf.random_shuffle(idxs) return K.gather(x, idxs)
def get_output(self, train=False): X = train out = K.gather(self.W, X) return out
def photoMetric(disp, left, right, width, height, batchsize): ''' Partially inspired by https://github.com/mtngld/monodepth-1/blob/1f1fc80ac0dc727f3de561ead89e6792aea5e178/bilinear_sampler.py, eg use of gather function ''' # Flatten and seperate out channels # [batch, width, height, channel] disp_f = K.flatten(K.permute_dimensions(disp, pattern=(0, 2, 1, 3))) left_f_0 = K.flatten( K.permute_dimensions(left[:, :, :, 0], pattern=(0, 2, 1))) right_f_0 = K.flatten( K.permute_dimensions(right[:, :, :, 0], pattern=(0, 2, 1))) left_f_1 = K.flatten( K.permute_dimensions(left[:, :, :, 1], pattern=(0, 2, 1))) right_f_1 = K.flatten( K.permute_dimensions(right[:, :, :, 1], pattern=(0, 2, 1))) left_f_2 = K.flatten( K.permute_dimensions(left[:, :, :, 2], pattern=(0, 2, 1))) right_f_2 = K.flatten( K.permute_dimensions(right[:, :, :, 2], pattern=(0, 2, 1))) # find the self-referantiatl indicies in the tensor indicies = K.arange(0, batchsize * width * height, dtype='float32') right_referances = K.clip( indicies + (disp_f * 1. * width * 0.3), 0, batchsize * width * height - 1) # changed to 0.3 to reflect v1 paper implemenation details # OK TO THIS POINT NO GRADS GET LOST intReferancesLow = K.cast(tf.floor(right_referances), 'int32') intReferancesHigh = K.cast(tf.ceil(right_referances), 'int32') lowWeights = 1 - K.abs( K.cast(intReferancesLow, 'float32') - right_referances) highWeights = 1 - K.abs( K.cast(intReferancesHigh, 'float32') - right_referances) # gather the values to creat the left re-projected images right_f_referance_to_projected_0 = K.gather( right_f_0, intReferancesLow) * lowWeights + K.gather( right_f_0, intReferancesHigh) * highWeights right_f_referance_to_projected_1 = K.gather( right_f_1, intReferancesLow) * lowWeights + K.gather( right_f_1, intReferancesHigh) * highWeights right_f_referance_to_projected_2 = K.gather( right_f_2, intReferancesLow) * lowWeights + K.gather( right_f_2, intReferancesHigh) * highWeights #return K.mean(right_f_referance_to_projected_0) # get difference between original left and right images #L2Direct = K.sqrt( K.square(left_f_0 - right_f_0) # + K.square(left_f_1 - right_f_1) # + K.square(left_f_2 - right_f_2)) L1Direct = K.abs((left_f_0 - right_f_0)) \ + K.abs((left_f_1 - right_f_1)) \ + K.abs((left_f_2 - right_f_2)) #L2Reproject = K.sqrt( K.square(left_f_0 - right_f_referance_to_projected_0) \ # + K.square(left_f_1 - right_f_referance_to_projected_1) \ # + K.square(left_f_2 - right_f_referance_to_projected_2) ) L1Reproject = K.abs(left_f_0 - right_f_referance_to_projected_0) \ + K.abs(left_f_1 - right_f_referance_to_projected_1) \ + K.abs(left_f_2 - right_f_referance_to_projected_2) greyImageRight = (right_f_0 + right_f_1 + right_f_2) / 3. greyImageReproject = (right_f_referance_to_projected_0 + right_f_referance_to_projected_1 + right_f_referance_to_projected_2) / 3. greyLeftImage = (left_f_0 + left_f_1 + left_f_2) / 3. mean_right = K.mean(greyImageRight) mean_reproject = K.mean(greyImageReproject) mean_left = K.mean(greyLeftImage) variance_right = K.sum(K.square(greyImageRight - mean_right)) / ( batchsize * width * height - 1) variance_reproject = K.sum( K.square(greyImageReproject - mean_reproject)) / (batchsize * width * height - 1) variance_left = K.sum( K.square(greyLeftImage - mean_left)) / (batchsize * width * height - 1) covariance_right_reproject = K.sum( (greyImageRight - mean_right) * (greyImageReproject - mean_reproject)) / ( batchsize * width * height - 1) # TODO not sum this for masking covariance_left_right = K.sum( (greyLeftImage - mean_left) * (greyImageRight - mean_right)) / ( batchsize * width * height - 1) # TODO not sum this for masking L = 256 - 1 # the range of the iamges c_1 = (0.01 * L) * (0.01 * L) # default values c_2 = (0.03 * L) * (0.03 * L) # default values SSIM_right_reproject = (2*mean_right*mean_reproject+c_1)*(2*covariance_right_reproject + c_2)/ \ ((mean_right*mean_right+mean_reproject*mean_reproject+c_1)*(variance_right*variance_right+variance_reproject*variance_reproject+c_2)) SSIM_right_left = (2*mean_right*mean_left+c_1)*(2*covariance_left_right + c_2)/ \ ((mean_right*mean_right+mean_left*mean_left+c_1)*(variance_right*variance_right+variance_left*variance_left+c_2)) #return L1Direct, L1Reproject * (right_referances /( right_referances + 1e-10)), SSIM_right_reproject, SSIM_right_left return L1Direct, L1Reproject, SSIM_right_reproject, SSIM_right_left
def get_output(self, train=False): X = self.get_input(train) if self.dropout: raise NotImplementedError() # TODO out = K.gather(self.W, X) return out
import keras import tensorflow as tf from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau from config import patience, epochs, num_train_samples, num_valid_samples, batch_size from data_generator import train_gen, valid_gen from model import build_model, build_simple_model # from utils import get_available_gpus, categorical_crossentropy_color import numpy as np import keras.backend as K prior_factor = np.load("prior_factor.npy") prior_factor = K.cast(prior_factor, dtype='float32') idx_max = np.random.randint(313, size=(16, 32, 32)) a = K.gather(prior_factor, idx_max) print("")
def simple_test(image_path): image = cv2.imread(image_path, cv2.IMREAD_COLOR) height = image.shape[1] width = image.shape[0] image = cv2.resize(image, (image_w,image_h)) image = image.reshape((1,image_w,image_h,3)) prediction = model.predict(image, batch_size=1) print(prediction.shape) # 1, 13, 13, 125 # Reshape it to 1,13,13,5,25 # 5 anchor boxes at every grid in 13 x 13 # 25 elements of reach anchorbox # probabiliity if an object is present, bx, by, w, h, 20 dim vector for each class p_resh = prediction.reshape(1, 13, 13, 5, 25) print(p_resh.shape) for box_i in range(5): box = p_resh[0][0][0][box_i] pc = box[0] c_scores = box[5:] res = pc * c_scores idx = np.argmax(res) p = class_dict[idx] print("Box No {} score {} box {},{},{},{} class {} ".format(box_i, res[idx], box[1],box[2],box[3],box[4], p)) box_confidence = p_resh[:,:,:,:,0] box_confidence = box_confidence.reshape(1,13,13,5,1) boxes = p_resh[:,:,:,:,1:5] boxes = boxes.reshape(1,13,13,5,4) box_class_prob = p_resh[:,:,:,:,5:] box_class_prob = box_class_prob.reshape(1,13,13,5,20) # Filter the boxes threshold = 0.6 box_scores = np.multiply(box_confidence, box_class_prob) print(box_scores.shape) box_class = K.argmax(box_scores, axis =-1) box_class_scores = K.max(box_scores, axis=-1) # Filtering mask filtering_mask = K.greater_equal(box_class_scores, threshold) with K.get_session() as test: scores = tf.boolean_mask(box_class_scores, filtering_mask).eval() boxes = tf.boolean_mask(boxes, filtering_mask).eval() classes = tf.boolean_mask(box_class, filtering_mask).eval() print(boxes.shape) print(classes.shape) print(scores.shape) max_boxes = 5 iou_threshold = 0.6 max_boxes_tensor = K.variable(max_boxes, dtype='int32') # tensor to be used in tf.image.non_max_suppression() test.run(tf.variables_initializer([max_boxes_tensor]))# initialize variable max_boxes_tensor # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) scores = K.gather(scores, nms_indices).eval() boxes = K.gather(boxes, nms_indices).eval() classes = K.gather(classes, nms_indices).eval() print(boxes.shape) print(classes.shape) print(scores.shape) # scale the boxes image_dims = K.stack([height, width, height, width]) image_dims = K.reshape(image_dims, [1, 4]) boxes = boxes * image_dims print(boxes.eval())
def copy_generator_network(batch_size, sequence_class, n_classes=1, seq_length=205, supply_inputs=False, master_generator=master_generator, copy_number=copy_number): sequence_class_onehots = np.eye(n_classes) #Generator network parameters latent_size = 100 #Generator inputs latent_input_1, latent_input_2, latent_input_1_out, latent_input_2_out = None, None, None, None if not supply_inputs: latent_input_1 = Input(tensor=K.ones((batch_size, latent_size)), name='noise_input_1') latent_input_2 = Input(tensor=K.ones((batch_size, latent_size)), name='noise_input_2') latent_input_1_out = Lambda( lambda inp: inp * K.random_uniform( (batch_size, latent_size), minval=-1.0, maxval=1.0), name='lambda_rand_input_1')(latent_input_1) latent_input_2_out = Lambda( lambda inp: inp * K.random_uniform( (batch_size, latent_size), minval=-1.0, maxval=1.0), name='lambda_rand_input_2')(latent_input_2) else: latent_input_1 = Input(batch_shape=(batch_size, latent_size), name='noise_input_1') latent_input_2 = Input(batch_shape=(batch_size, latent_size), name='noise_input_2') latent_input_1_out = Lambda( lambda inp: inp, name='lambda_rand_input_1')(latent_input_1) latent_input_2_out = Lambda( lambda inp: inp, name='lambda_rand_input_2')(latent_input_2) class_embedding = Lambda( lambda x: K.gather(K.constant(sequence_class_onehots), K.cast(x[:, 0], dtype='int32')))(sequence_class) seed_input_1 = Concatenate(axis=-1)( [latent_input_1_out, class_embedding]) seed_input_2 = Concatenate(axis=-1)( [latent_input_2_out, class_embedding]) #Policy network definition policy_dense_1 = master_generator.get_layer('policy_dense_1') policy_dense_1_reshape = Reshape((21, 1, 384)) policy_deconv_0 = master_generator.get_layer('policy_deconv_0') policy_deconv_1 = master_generator.get_layer('policy_deconv_1') policy_deconv_2 = master_generator.get_layer('policy_deconv_2') policy_conv_3 = master_generator.get_layer('policy_conv_3') policy_conv_4 = master_generator.get_layer('policy_conv_4') policy_conv_5 = master_generator.get_layer('policy_conv_5') #policy_deconv_3 = Conv2DTranspose(4, (7, 1), strides=(1, 1), padding='valid', activation='linear', kernel_initializer='glorot_normal', name='policy_deconv_3') batch_norm_0 = master_generator.get_layer('policy_batch_norm_0') relu_0 = Lambda(lambda x: K.relu(x)) batch_norm_1 = master_generator.get_layer('policy_batch_norm_1') relu_1 = Lambda(lambda x: K.relu(x)) batch_norm_2 = master_generator.get_layer('policy_batch_norm_2') relu_2 = Lambda(lambda x: K.relu(x)) batch_norm_3 = master_generator.get_layer('policy_batch_norm_3') relu_3 = Lambda(lambda x: K.relu(x)) batch_norm_4 = master_generator.get_layer('policy_batch_norm_4') relu_4 = Lambda(lambda x: K.relu(x)) policy_out_1 = Reshape((seq_length, 4, 1))(policy_conv_5( relu_4( batch_norm_4(policy_conv_4( relu_3( batch_norm_3(policy_conv_3( relu_2( batch_norm_2(policy_deconv_2( relu_1( batch_norm_1(policy_deconv_1( relu_0( batch_norm_0(policy_deconv_0( policy_dense_1_reshape( policy_dense_1( seed_input_1))), training=True))), training=True))), training=True))), training=True))), training=True)))) policy_out_2 = Reshape((seq_length, 4, 1))(policy_conv_5( relu_4( batch_norm_4(policy_conv_4( relu_3( batch_norm_3(policy_conv_3( relu_2( batch_norm_2(policy_deconv_2( relu_1( batch_norm_1(policy_deconv_1( relu_0( batch_norm_0(policy_deconv_0( policy_dense_1_reshape( policy_dense_1( seed_input_2))), training=True))), training=True))), training=True))), training=True))), training=True)))) return [latent_input_1, latent_input_2], [policy_out_1, policy_out_2], []
def neighbour_lookup(atoms, edges, maskvalue=0, include_self=False): ''' Looks up the features of an all atoms neighbours, for a batch of molecules. # Arguments: atoms (K.tensor): of shape (batch_n, max_atoms, num_atom_features) edges (K.tensor): of shape (batch_n, max_atoms, max_degree) with neighbour indices and -1 as padding value maskvalue (numerical): the maskingvalue that should be used for empty atoms or atoms that have no neighbours (does not affect the input maskvalue which should always be -1!) include_self (bool): if True, the featurevector of each atom will be added to the list feature vectors of its neighbours # Returns: neigbour_features (K.tensor): of shape (batch_n, max_atoms(+1), max_degree, num_atom_features) depending on the value of include_self # Todo: - make this function compatible with Tensorflow, it should be quite trivial because there is an equivalent of `T.arange` in tensorflow. ''' # The lookup masking trick: We add 1 to all indices, converting the # masking value of -1 to a valid 0 index. masked_edges = edges + 1 # We then add a padding vector at index 0 by padding to the left of the # lookup matrix with the value that the new mask should get masked_atoms = temporal_padding(atoms, (1, 0), padvalue=maskvalue) # Import dimensions atoms_shape = K.shape(masked_atoms) batch_n = atoms_shape[0] lookup_size = atoms_shape[1] num_atom_features = atoms_shape[2] edges_shape = K.shape(masked_edges) max_atoms = edges_shape[1] max_degree = edges_shape[2] # create broadcastable offset offset_shape = (batch_n, 1, 1) offset = K.reshape( tf.keras.backend.arange(stop=batch_n, start=0, dtype='int32'), offset_shape) offset *= lookup_size # apply offset to account for the fact that after reshape, all individual # batch_n indices will be combined into a single big index flattened_atoms = K.reshape(masked_atoms, (-1, num_atom_features)) flattened_edges = K.reshape(masked_edges + offset, (batch_n, -1)) # Gather flattened flattened_result = K.gather(flattened_atoms, flattened_edges) # Unflatten result output_shape = (batch_n, max_atoms, max_degree, num_atom_features) output = K.reshape(flattened_result, output_shape) if include_self: return K.concatenate([tf.expand_dims(atoms, axis=2), output], axis=2) return output
def build_network(X_nodes, X_edges, X_nodes_in_out, X_messages_in, X_messages_out, message_passers, state_updater, readout, ndim_features_nodes, fake_message_const, steps): for step in range(steps): messages = message_passers[step](K.concatenate([ K.reshape(K.gather(reference=X_nodes, indices=X_nodes_in_out), shape=(-1, 2 * ndim_features_nodes)), X_edges ], axis=1)) messages = K.concatenate([messages, fake_message_const], axis=0) messages = tf.where(tf.is_inf(messages), tf.zeros_like(messages), messages) messages_aggregated_in = K.max(K.gather(reference=messages, indices=X_messages_in), axis=1) messages_aggregated_out = K.max(K.gather(reference=messages, indices=X_messages_out), axis=1) messages_aggregated_in2 = K.mean(K.gather(reference=messages, indices=X_messages_in), axis=1) messages_aggregated_out2 = K.mean(K.gather(reference=messages, indices=X_messages_out), axis=1) messages_aggregated_in3 = K.var(K.gather(reference=messages, indices=X_messages_in), axis=1) messages_aggregated_out3 = K.var(K.gather(reference=messages, indices=X_messages_out), axis=1) messages_aggregated_in4 = K.std(K.gather(reference=messages, indices=X_messages_in), axis=1) messages_aggregated_out4 = K.std(K.gather(reference=messages, indices=X_messages_out), axis=1) ## For GRU-based state_updater # _, X_nodes = state_updater( # inputs=K.concatenate([messages_aggregated_in, messages_aggregated_out # ,messages_aggregated_in2, messages_aggregated_out2, # messages_aggregated_in3, messages_aggregated_out3, # ], axis=1), # state=X_nodes # ) # For LSTM-based state_updater # _, (_, X_nodes) = state_updater( # inputs=K.concatenate([messages_aggregated_in, messages_aggregated_out # ,messages_aggregated_in2, messages_aggregated_out2, # messages_aggregated_in3, messages_aggregated_out3, # messages_aggregated_in4, messages_aggregated_out4 # ], axis=1), # state=(tf.zeros_like(X_nodes), X_nodes) # ) ## For dense state_updater X_nodes = state_updater( K.concatenate([ messages_aggregated_in, messages_aggregated_in2, messages_aggregated_out, messages_aggregated_out2, messages_aggregated_in3, messages_aggregated_out3, messages_aggregated_in4, messages_aggregated_out4, X_nodes ], axis=1)) return readout(X_nodes)
def _construct_inference_tensors(*, restored_model, num_of_anchors, anchors, model_image_width, model_image_height, prob_detection_threshold=0.25, nms_iou_threshold=0.5): """ Constructs input tensors (placeholders) and output tensors that are used for inference. Arguments: :param restored_model Keras model restored from the Darknet :param num_of_anchors number of anchors used in the architecture :param anchors anchors used in the architecture (expected shape=(num_of_anchors, 2), first dimension is width) :param model_image_width width of the image used by model (needs to be divisible by 32) :param model_image_height height of the image used by model (needs to be divisible by 32) :param model_image_height height of the image used by model (needs to be divisible by 32) :param prob_detection_threshold threshold for detecting object :param nms_iou_threshold threshold for non-max suppresion Returns: :return (out_tensors, input_tensors) - out_tensors - (picked_boxes, picked_classes, picked_scores) - picked_boxes = Tensor of (left, top, bottom, right) - picked_classes = Tensor of ints - picked_score = Tensor of floats - input_tensors = (model_input, orig_image_width, orig_image_height) - orig_image_width - Placeholder for original image width (before resizing) - orig_image_height - Placeholder for original image height (before resizing) - model_input - Placeholder for image pixels """ start = time.time() boxes = [] prob_class = [] placeholder_orig_image_width = K.placeholder(shape=(1, )) placeholder_orig_image_height = K.placeholder(shape=(1, )) for yolo_head_idx in range(len(restored_model.output)): yolo_head = restored_model.output[yolo_head_idx] yolo_head_shape = K.shape(yolo_head) yolo_head_num_of_cols, yolo_head_num_of_rows = yolo_head_shape[ 2], yolo_head_shape[1] curr_yolo_head = K.reshape(yolo_head, [ -1, yolo_head_num_of_cols, yolo_head_num_of_rows, num_of_anchors, NUM_OF_BOX_PARAMS + NUM_OF_CLASSES ]) grid = construct_grid(yolo_head_shape[1], yolo_head_shape[2]) grid = K.cast(grid, dtype=K.dtype(curr_yolo_head)) grid_size = K.cast([yolo_head_num_of_cols, yolo_head_num_of_rows], dtype=K.dtype(curr_yolo_head)) curr_boxes_xy = (K.sigmoid(curr_yolo_head[..., :2]) + grid) / grid_size curr_boxes_wh = K.exp(curr_yolo_head[..., 2:4]) * anchors[yolo_head_idx] curr_prob_obj = K.sigmoid(curr_yolo_head[..., 4:5]) curr_prob_class = K.sigmoid(curr_yolo_head[..., 5:]) curr_prob_detected_class = curr_prob_obj * curr_prob_class boxes.append( get_corrected_boxes( box_width=curr_boxes_wh[..., 0:1], box_height=curr_boxes_wh[..., 1:2], box_x=curr_boxes_xy[..., 0:1], box_y=curr_boxes_xy[..., 1:2], orig_image_shape=(placeholder_orig_image_width, placeholder_orig_image_height), model_image_shape=(model_image_width, model_image_height))) curr_prob_detected_class = K.reshape(curr_prob_detected_class, [-1, NUM_OF_CLASSES]) prob_class.append(curr_prob_detected_class) prob_class = K.concatenate(prob_class, axis=0) boxes = K.concatenate(boxes, axis=0) mask = prob_class >= prob_detection_threshold max_boxes_tensor = K.constant(20, dtype='int32') picked_boxes = [] picked_scores = [] picked_classes = [] for c in range(NUM_OF_CLASSES): class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(prob_class[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=nms_iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c picked_boxes.append(class_boxes) picked_scores.append(class_box_scores) picked_classes.append(classes) picked_boxes = K.concatenate(picked_boxes, axis=0) picked_scores = K.concatenate(picked_scores, axis=0) picked_classes = K.concatenate(picked_classes, axis=0) out_tensors = [picked_boxes, picked_scores, picked_classes] print(f'Took {time.time() - start} seconds to construct network.') input_tensors = [ restored_model.input, placeholder_orig_image_width, placeholder_orig_image_height ] return out_tensors, input_tensors
def decoder_fn(time, cell_state, cell_input, cell_output, context_state): """Decoder function used in the `dynamic_rnn_decoder` for inference. The main difference between this decoder function and the `decoder_fn` in `attention_decoder_fn_train` is how `next_cell_input` is calculated. In decoder function we calculate the next input by applying an argmax across the feature dimension of the output from the decoder. This is a greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014) use beam-search instead. Args: time: positive integer constant reflecting the current timestep. cell_state: state of RNNCell. cell_input: input provided by `dynamic_rnn_decoder`. cell_output: output of RNNCell. context_state: context state provided by `dynamic_rnn_decoder`. Returns: A tuple (done, next state, next input, emit output, next context state) where: done: A boolean vector to indicate which sentences has reached a `end_of_sequence_id`. This is used for early stopping by the `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with all elements as `true` is returned. next state: `cell_state`, this decoder function does not modify the given state. next input: The embedding from argmax of the `cell_output` is used as `next_input`. emit output: If `output_fn is None` the supplied `cell_output` is returned, else the `output_fn` is used to update the `cell_output` before calculating `next_input` and returning `cell_output`. next context state: `context_state`, this decoder function does not modify the given context state. The context state could be modified when applying e.g. beam search. Raises: ValueError: if cell_input is not None. """ with tf.name_scope( name, "attention_decoder_fn_inference", [time, cell_state, cell_input, cell_output, context_state]): if cell_input is not None: raise ValueError( "Expected cell_input to be None, but saw: %s" % cell_input) if cell_output is None: # invariant that this is time == 0 next_input_id = K.ones([ batch_size, ], dtype=dtype) * (start_of_sequence_id) done = tf.zeros([ batch_size, ], dtype=tf.bool) cell_state = encoder_state cell_output = K.zeros([num_decoder_symbols], dtype=tf.float32) cell_input = K.gather(embeddings, next_input_id) # init attention attention = _init_attention(encoder_state) else: # construct attention attention = attention_construct_fn(cell_output, attention_keys, attention_values) cell_output = attention # argmax decoder cell_output = output_fn(cell_output) # logits next_input_id = K.cast(K.argmax(cell_output, 1), dtype=dtype) done = K.equal(next_input_id, end_of_sequence_id) cell_input = K.gather(embeddings, next_input_id) # combine cell_input and attention next_input = Concatenate(axis=1)([cell_input, attention]) # if time > maxlen, return all true vector done = tf.cond(K.greater(time, maximum_length), lambda: K.ones([ batch_size, ], dtype=tf.bool), lambda: done) return (done, cell_state, next_input, cell_output, context_state)
def dropped_inputs(): if 'max' == self.agg_method: x_agg = bk.max(inputs, axis=0) if self.smooth_rate > 0: x_agg = self.smooth_rate * bk.mean( inputs, axis=0) + (1 - self.smooth_rate) * x_agg elif 'extreme' == self.agg_method: x_mean = bk.mean(inputs, axis=0) x_agg = tf.where(x_mean >= 0, bk.max(inputs, axis=0), bk.min(inputs, axis=0)) if self.smooth_rate > 0: x_agg = self.smooth_rate * x_mean + ( 1 - self.smooth_rate) * x_agg else: x_agg = bk.mean(inputs, axis=0) x_min, x_max = bk.min(x_agg), bk.max(x_agg) x_agg_int = bk.cast( input_shape[-1] * (x_agg - x_min) / (x_max - x_min), 'int32') if self.unique_supported: _, idx, counts = tf.unique_with_counts(x_agg_int) dr = self.rate**( 1. / (self.anneal * bk.cast(counts, inputs.dtype))) dr = tf.where(1 == counts, self.rate * bk.ones_like(dr), dr) else: def _seg_dr(ele): _cnt = bk.sum(bk.cast(ele == x_agg_int, inputs.dtype)) _dr = self.rate if 1 == _cnt else self.rate**( 1. / (self.anneal * _cnt)) return _dr dr = bk.map_fn(_seg_dr, x_agg_int, dtype=inputs.dtype) idx = bk.arange(0, x_agg_int.shape[0]) if 'gaussian' == self.noise_type: sigma = (dr / (1. - dr))**.5 noise_tensor = bk.gather(sigma, idx) * bk.random_normal( x_agg_int.shape, dtype=inputs.dtype) + 1. return inputs * noise_tensor else: dr_tensor = bk.random_uniform(noise_shape, seed=self.seed, dtype=inputs.dtype) ret = inputs * bk.cast(dr_tensor >= bk.gather(dr, idx), inputs.dtype) if 'abs' == self.keep_amp_type: old_amps = bk.sum(bk.abs(inputs), axis=-1, keepdims=True) cur_amps = bk.sum(bk.stop_gradient(bk.abs(ret)), axis=-1, keepdims=True) ret = ret * old_amps / (cur_amps + self.epsilon) elif self.keep_amp_type is not None: old_amps = bk.sum(inputs, axis=-1, keepdims=True) cur_amps = bk.sum(bk.stop_gradient(ret), axis=-1, keepdims=True) ret = ret * old_amps / (cur_amps + self.epsilon) return ret
def yolo_eval(yolo_outputs, anchors, num_classes, image_shape, max_boxes=20, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input and return filtered boxes.""" num_layers = len(yolo_outputs) anchor_mask = [[3, 4, 5], [0, 1, 2]] # default setting input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] ### for c == 4 class_boxes = tf.boolean_mask(boxes, mask[:, 4]) class_box_scores = tf.boolean_mask(box_scores[:, 4], mask[:, 4]) nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * 4 boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) for c in range(num_classes): if c == 4: continue else: class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression( class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def sparse_gather(y_pred, target_indices, task_name): clf_h = Lambda(lambda x: K.reshape(x, (-1, K.int_shape(x)[-1])), name=task_name + '_flatten')(y_pred) return Lambda(lambda x: K.gather(x[0], K.cast(x[1], 'int32')), name=task_name + '_gather')([clf_h, target_indices])
iou_rate = inter_area / union_area return iou_rate def draw_rectangle(): """ 画矩形框 :return: """ fig = plt.figure() #创建图 ax = fig.add_subplot(111) # 创建子图 # ax = plt.gca() # 获得当前整张图表的坐标对象 ax.invert_yaxis() # y轴反向 ax.xaxis.set_ticks_position('top') # 将x轴的位置设置在顶部 def add_rectangle(x1,y1,x2,y2,color="black"): # 输入剧性的对脚坐标 ax.add_patch(patches.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, color=color)) add_rectangle(.2, .1, .4, .3) add_rectangle(.3, .1, .4, .3, color="red") add_rectangle(.3, .1, .4, .4, color="blue") add_rectangle(.1, .1, .4, .4, color="orange") #scores = np.array([.4,.5,.72,.9,.45],dtype=np.float32) add_rectangle(.1, .1, .4, .3, color="yellow") plt.show() boxes = np.array([[.1,.2,.3,.4],[.1,.3,.3,.4],[.1,.3,.4,.4],[.1,.1,.4,.4],[.1,.1,.3,.4]], dtype=np.float32) scores = np.array([.4,.5,.72,.9,.45],dtype=np.float32) with tf.Session() as sess: selected_indices = sess.run(tf.image.non_max_suppression(boxes=boxes, scores=scores,iou_threshold=0.5, max_output_size=5)) print(selected_indices) selected_boxes = sess.run(K.gather(boxes, selected_indices)) print(selected_boxes) draw_rectangle()