def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def call(self, x, mask=None): ''' shape=(batch_size,new_time_step,filters) x_cont=Tensor("layer_dropout_5/cond/Identity:0", shape=(None, None, 128), dtype=float32) x_ques=Tensor("layer_dropout_11/cond/Identity:0", shape=(None, None, 128), dtype=float32) c_mask=Tensor("batch_slice_4/Slice:0", shape=(None, None), dtype=bool)# q_mask=Tensor("batch_slice_5/Slice:0", shape=(None, None), dtype=bool) ''' x_cont, x_ques, c_mask, q_mask = x # get similarity matrix S ##K.dot(x_cont, self.W0)维度变化: [batch_size,time_step,dim] *[dim,1] =[batch_size,time_step,1] subres0 = K.tile(K.dot(x_cont, self.W0), [1, 1, self.q_maxlen]) subres1 = K.tile( K.permute_dimensions(K.dot(x_ques, self.W1), pattern=(0, 2, 1)), [1, self.c_maxlen, 1]) subres2 = K.batch_dot(x_cont * self.W2, K.permute_dimensions(x_ques, pattern=(0, 2, 1))) S = subres0 + subres1 + subres2 S += self.bias q_mask = tf.expand_dims(q_mask, 1) #默认是对最后一维度,即axis=-1 S_ = tf.nn.softmax(self.mask_logits(S, q_mask)) c_mask = tf.expand_dims(c_mask, 2) S_T = K.permute_dimensions( tf.nn.softmax(self.mask_logits(S, c_mask), axis=1), (0, 2, 1)) c2q = tf.matmul(S_, x_ques) q2c = tf.matmul(tf.matmul(S_, S_T), x_cont) result = K.concatenate([x_cont, c2q, x_cont * c2q, x_cont * q2c], axis=-1) return result
def DeltaLayer(encoded_l, encoded_r, negateDiffs=False): """ A Layer which computes all possible absolute differences of all pixels. Input are two feature volumes, e.g. result of a conv layer Hints: - The Reshape reshapes a matrix row-wise, that means, Reshape( (6,1) ) ([ 1 2 3 4 5 6]) is 1 2 3 4 5 6 - Algorithm: - The left leg is reshaped to a w*h x 1 column vector (for each channel) - The right leg is reshaped to a 1 x w*h row vector (for each channel) - The left is tiled along colum axis, so from w*h x 1 to w*h x w*h (per channel) - The right is tiled along row axis, so from 1 x w*h to w*h x w*h - The absolute difference is calculated Args: encoded_l, encoded_r : left and right image tensor (batchsize,w,h,channels) must have same size negateDiffs: if True then not abs(diffs), but -abs(diffs) is returned. Default: False Returns: difference tensor, has size (batchsize, w*h, w*h, channels) """ w = encoded_l.shape[1] h = encoded_l.shape[2] chan = encoded_l.shape[3] reshapel = Reshape((w * h, 1, chan)) # reshape layer reshaped_l = reshapel(encoded_l) reshaper = Reshape((1, w * h, chan)) reshaped_r = reshaper(encoded_r) # 之所以是4个维度是因为第一个维度需要给batch,即Reshape输出的就是四维的 tiled_l = Lambda(lambda x: K.tile(x, [1, 1, w * h, 1]))(reshaped_l) tiled_r = Lambda(lambda x: K.tile(x, [1, w * h, 1, 1]))(reshaped_r) if negateDiffs: diff = Lambda(lambda x: -K.abs(x[0] - x[1]))([tiled_l, tiled_r]) else: diff = Lambda(lambda x: K.abs(x[0] - x[1]))([tiled_l, tiled_r]) # print("diff类型+++++++++++++", diff) return diff
def call(self, inputs, **kwargs): inputs, memory_length = inputs memory_length = K.cast(memory_length[0][0], 'int32') batch_size = K.cast(K.shape(inputs)[0], 'int32') seq_len = K.cast(K.shape(inputs)[1], 'int32') # Build new memory pad = K.tile(inputs[0:1, ...], (self.batch_size - batch_size, 1, 1)) padded = K.concatenate([inputs, pad], axis=0) # (self.batch_size, seq_len, output_dim) new_memory = K.concatenate([self.memory, padded], axis=1) # (self.batch_size, self.memory_len + self.target_len + seq_len, ...) new_memory = tf.slice( # (self.batch_size, self.memory_len + self.target_len, output_dim) new_memory, (0, seq_len, 0), (self.batch_size, self.memory_len + self.target_len, self.output_dim), ) self.add_update(K.update(self.memory, new_memory), inputs) # Build output old_memory = tf.slice( # (batch_size, memory_length, output_dim) new_memory, (0, K.maximum(0, self.memory_len + self.target_len - seq_len - memory_length), 0), (batch_size, K.minimum(self.memory_len, memory_length), self.output_dim), ) return old_memory
def _pad(self, y): if self.N > self.num_leaves: # pads the encoding with zeros in the place of non-leaf nodes # cast in case our labels are ints y = tf.cast(y, self.p.dtype) P = K.tile(self.p, (K.shape(y)[0], 1)) return K.concatenate((y, P))
def create_inital_state(inputs, hidden_size): # We are not using initial states, but need to pass something to K.rnn funciton fake_state = K.zeros_like(inputs) # <= (batch_size, enc_seq_len, latent_dim fake_state = K.sum(fake_state, axis=[1, 2]) # <= (batch_size) fake_state = K.expand_dims(fake_state) # <= (batch_size, 1) fake_state = K.tile(fake_state, [1, hidden_size]) # <= (batch_size, latent_dim return fake_state
def call(self, x): print(x) features_dim = x.shape[-1].value step_dim = x.shape[-2].value print(K.reshape(self.kernel, (-1, features_dim))) # n, d print(K.reshape(self.W, (features_dim, 1))) # w= dx1 print(K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1)))) # nx1 eij = K.reshape(K.dot(K.reshape(self.kernel, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim)) # batch,step print(eij) eij += self.b eij = K.tanh(eij) a = K.exp(eij) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = tf.transpose(a,(1,0)) print(a) print("x:") print(self.kernel) weighted_input = self.kernel * a # 自动填充为相同的维度相乘 N T K print(weighted_input.shape) temp = K.sum(weighted_input, axis=0) # N K 权重相加 temp = K.tile(K.expand_dims(temp, 0), [step_dim, 1]) temp = keras.layers.concatenate([self.kernel, temp]) temp = K.dot(temp, self.W2) + self.b2 return x + temp
def call(self, inputs, **kwargs): memory, speaker_embedding = inputs tiled_speaker_embeddings = K.tile( K.expand_dims(speaker_embedding, axis=1), [1, K.shape(memory)[1], 1]) conditioned_memory = K.concatenate([memory, tiled_speaker_embeddings], axis=-1) return conditioned_memory
def call(self, inputs): batch_size = K.shape(inputs)[0] num_rows = K.int_shape(inputs)[1] num_cols = K.int_shape(inputs)[2] num_channels = K.int_shape(inputs)[3] n = num_rows * num_cols X = K.reshape(inputs, (batch_size, num_channels, n)) factor = K.cast(1 / n, K.floatx()) I_hat = factor * (K.eye(n) - factor * K.ones((n, n))) I_hat = K.tile( K.expand_dims(I_hat, axis=0), (batch_size, 1, 1)) # One identity matrix per sample in batch Sigma = K.batch_dot(K.batch_dot(X, I_hat), K.permute_dimensions(X, (0, 2, 1))) # Pre-normalization trace = K.sum(K.sum(K.eye(num_channels) * Sigma, axis=1, keepdims=True), axis=2, keepdims=True) A = Sigma / trace # Newton-Schulz Iteration Y = A Z = K.eye(num_channels) Z = K.tile(K.expand_dims(Z, axis=0), (batch_size, 1, 1)) I3 = 3 * K.eye(num_channels) I3 = K.tile(K.expand_dims(I3, axis=0), (batch_size, 1, 1)) for i in range(self.num_iter): Y = 0.5 * K.batch_dot(Y, I3 - K.batch_dot(Z, Y)) Z = 0.5 * K.batch_dot(I3 - K.batch_dot(Z, Y), Z) # Post-compensation C = K.sqrt(trace) * Y # Extract upper triangular matrix as vector ones = K.ones((num_channels, num_channels)) mask = tf.matrix_band_part(ones, 0, -1) # Upper triangular matrix of 0s and 1s mask = K.cast(mask, 'bool') # Convert integer mask to boolean mask triuvec = tf.boolean_mask( C, mask, axis=1) # Apply mask to 2nd and 3rd dimension triuvec.set_shape((None, num_channels * (num_channels + 1) / 2)) # Set correct shape manually return triuvec
def create_inital_state(inputs, hidden_size): fake_state = K.zeros_like( inputs) # (batch_size, enc_seq_len, latent_dim) fake_state = K.sum(fake_state, axis=[1, 2]) # (batch_size) fake_state = K.expand_dims(fake_state) # (batch_size, 1) fake_state = K.tile(fake_state, [1, hidden_size]) # (batch_size, latent_dim) return fake_state
def call(self, x, mask=None): features_dim = self.features_dim step_dim = self.step_dim t1 = x[:, 0, :] t1 = K.expand_dims(t1, 1) # t1 = K.tile(t1, [1, step_dim, 1]) print(t1) eij = K.batch_dot(x, t1, (2, 2)) #(?,500,1) # eij = K.tile(eij, [1, 1, features_dim]) print(eij) a = K.exp(eij) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) print(a) weighted_input = x * a temp = K.sum(weighted_input, axis=1) temp = K.expand_dims(temp, 1) temp = K.tile(temp, [1, 1, features_dim]) print(temp) alltemp = temp for i in range(1, step_dim): t1 = x[:, i, :] t1 = K.expand_dims(t1, 1) # t1 = K.tile(t1, [1, 2, 1]) eij = K.batch_dot(x, t1, (2, 2)) # eij = K.tile(eij, [1, 1, features_dim]) a = K.exp(eij) a /= K.cast( K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) weighted_input = x * a temp = K.sum(weighted_input, axis=1) temp = K.expand_dims(temp, 1) temp = K.tile(temp, [1, 1, features_dim]) alltemp = keras.layers.concatenate([alltemp, temp], 1) temp = keras.layers.concatenate([x, alltemp]) return temp
def _generate_recurrent_dropout_mask(self, inputs, training=None): if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) def dropped_inputs(): return K.dropout(ones, self.dropout) self._recurrent_dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] else: self._recurrent_dropout_mask = None
def call(self, inputs, **kwargs): length = K.shape(inputs[0])[1] + K.shape(inputs[1])[1] inputs = K.tile( K.expand_dims(K.arange(length - 1, -1, -1, dtype=K.floatx()), axis=0), [K.shape(inputs[0])[0], 1], ) if self.clamp_len is not None: inputs = K.clip(inputs, min_value=0, max_value=self.clamp_len) inputs = K.expand_dims(inputs, axis=-1) output_dim = K.cast(self.output_dim, K.floatx()) ranges = K.expand_dims(K.arange(0.0, self.output_dim, 2.0), axis=0) / output_dim inverse = 1.0 / K.pow(10000.0, ranges) positions = inputs * inverse return K.concatenate([K.sin(positions), K.cos(positions)], axis=-1)
def get_initial_state(self, inputs): initial_state = K.zeros_like(inputs) initial_state = K.sum(initial_state, axis=(1, 2)) initial_state = K.expand_dims(initial_state) initial_state = K.tile(initial_state, [1, self.units]) # (samples, output_dim) n = K.identity(initial_state) d = K.identity(initial_state) h = K.identity(initial_state) dtype = initial_state.dtype.name min_value = np.array([1E38]).astype(dtype).item() a_max = K.identity(initial_state) - min_value h = h + self.cell.recurrent_activation(K.expand_dims(self.cell.initial_attention, axis=0)) return [n, d, h, a_max]
def call(self, inputs): filter = np.array(self.filter, np.float32) if filter.ndim == 1: filter = filter[:, np.newaxis] * filter[np.newaxis, :] if self.normalize: filter /= np.sum(filter) filter = filter[:, :, np.newaxis, np.newaxis] filter = K.constant(filter, dtype=inputs.dtype, name='filter') filter = K.tile(filter, [1, 1, K.shape(inputs)[-1], 1]) outputs = nn.depthwise_conv2d( inputs, filter, strides=(1, self.stride, self.stride, 1), padding='SAME') return outputs
def get_initial_state(self, inputs): # (samples, timesteps, rows, cols, z, filters) initial_state = K.zeros_like(inputs) d = [1,2,1,1,1] if self.cell.data_format == 'channels_first' else [1,1,1,1,2] initial_state = K.tile(initial_state, d) shape = list(self.cell.kernel_shape) shape[-1] = self.cell.filters initial_state = self.cell.input_conv(initial_state, array_ops.zeros(tuple(shape)), padding=self.cell.padding) if hasattr(self.cell.state_size, '__len__'): return [initial_state for _ in self.cell.state_size] else: return [initial_state]
def create_inital_state(inputs, hidden_size): # hidden_size=64 # print("inputs",inputs) # print("hidden_size",hidden_size) # print("type(hidden_size)", type(hidden_size)) # We are not using initial states, but need to pass something to K.rnn funciton fake_state = K.zeros_like( inputs) # [b,64,512]<= (batch_size, enc_seq_len, latent_dim) fake_state = K.sum(fake_state, axis=[1, 2]) # <= (batch_size) fake_state = K.expand_dims(fake_state) # <= (batch_size, 1) # print(fake_state) # print("------") # print(tf.shape(fake_state)) # print("hidden_size:",hidden_size) fake_state = tile( fake_state, [1, hidden_size]) # <= (batch_size, latent_dim) (b,64) return fake_state
def call(self, inputs, **kwargs): # calculate the mean value for each pixel across channels mean = K.mean(inputs, axis=0, keepdims=True) # calculate the squared differences between pixel values and mean squ_diffs = K.square(inputs - mean) # calculate the average of the squared differences (variance) mean_sq_diff = K.mean(squ_diffs, axis=0, keepdims=True) # add a small value to avoid a blow-up when we calculate stdev mean_sq_diff += 1e-8 # square root of the variance (stdev) stdev = K.sqrt(mean_sq_diff) # calculate the mean standard deviation across each pixel coord mean_pix = K.mean(stdev, keepdims=True) # scale this up to be the size of one input feature map for each sample shape = K.shape(inputs) output = K.tile(mean_pix, (shape[0], shape[1], shape[2], 1)) # concatenate with the output combined = K.concatenate([inputs, output], axis=-1) return combined
def _reshape_mask(self, mask): seq_len = K.shape(mask)[1] mask = K.expand_dims(mask, axis=1) mask = K.tile(mask, [1, self.num_head, 1]) return K.reshape(mask, (-1, seq_len))
def _attention_layer(self, memory_plus_inputs, ns_plus_one, rpe_out, rpe_neighbor1, rpe_neighbor2, ws): from_length = self.num_memory_slots + 1 to_length = self.num_memory_slots + 1 q_bias, k_bias, v_bias = array_ops.split(ws["attention_bias"], 3, axis=0) # [B, F, C] query_layer = memory_plus_inputs * K.expand_dims(K.sqrt(ns_plus_one)) # [B, F, N*H] query_layer = K.dot(query_layer, ws["attention_kernel"][:, :self.units]) # [B, F, N*H] query_layer = K.bias_add(query_layer, q_bias) # [B, F, N, H] query_layer = array_ops.reshape( query_layer, [-1, from_length, self.num_attention_heads, self.size_per_head]) # [B, N, F, H] query_layer = array_ops.transpose(query_layer, perm=[0, 2, 1, 3]) # [B*N, F, H] query_layer = array_ops.reshape( query_layer, shape=[-1, from_length, self.size_per_head]) # [B, T, C] key_layer = memory_plus_inputs * K.expand_dims(K.sqrt(ns_plus_one)) # [B, T, N*H] key_layer = K.dot(key_layer, ws["attention_kernel"][:, self.units:self.units * 2]) # [B, T, N*H] key_layer = K.bias_add(key_layer, k_bias) # [B, T, N, H] key_layer = array_ops.reshape( key_layer, [-1, to_length, self.num_attention_heads, self.size_per_head]) # [B, N, T, H] key_layer = array_ops.transpose(key_layer, perm=[0, 2, 1, 3]) # [B*N, T, H] key_layer = array_ops.reshape( key_layer, shape=[-1, to_length, self.size_per_head]) # [B, T, N*H] value_layer = K.dot( memory_plus_inputs, ws["attention_kernel"][:, self.units * 2:self.units * 3]) # [B, T, N*H] value_layer = K.bias_add(value_layer, v_bias) # [B, T, N, H] value_layer = array_ops.reshape( value_layer, [-1, to_length, self.num_attention_heads, self.size_per_head]) # [B, N, T, H] value_layer = array_ops.transpose(value_layer, perm=[0, 2, 1, 3]) # [B*N, T, H] value_layer = array_ops.reshape( value_layer, shape=[-1, to_length, self.size_per_head]) # [B*N, 1, T] attention_scores = K.batch_dot(query_layer[:, -1:, :], key_layer, axes=[2, 2]) if self.use_relative_position: # [B, T, N*H] r = K.dot(rpe_out, ws["rel_kernel"]) # [B, T, N, H] r = array_ops.reshape( r, [-1, to_length, self.num_attention_heads, self.size_per_head]) # [B, N, T, H] r = array_ops.transpose(r, perm=[0, 2, 1, 3]) # [B*N, T, H] r = array_ops.reshape(r, [-1, to_length, self.size_per_head]) # [B*N, 1, T] bd = tf.einsum("bfh,bth->bft", query_layer[:, -1:, :], r) # [B*N, 1, T] attention_scores += bd # [B*N, 1, T] attention_scores = attention_scores / K.cast(self.size_per_head, tf.float32) # [B, N, T] mask = K.tile( K.expand_dims(K.cast(ns_plus_one > 0, tf.float32), axis=1), [1, self.num_attention_heads, 1]) # [B*N, 1, T] mask = array_ops.reshape(mask, [-1, 1, to_length]) # [B*N, 1, T] attention_scores -= (1.0 - mask) * 10000.0 # [B*N, 1, T] attention_probs = K.softmax(attention_scores) # [B*N, 1, H] context_layer = K.batch_dot(attention_probs, value_layer, axes=[2, 1]) # [B, N, H] context_layer = array_ops.reshape( context_layer, [-1, self.num_attention_heads, self.size_per_head]) # [B, N*H] context_layer = array_ops.reshape( context_layer, [-1, self.num_attention_heads * self.size_per_head]) # ----------------------------- # [B*N, F, H] neighbor_score = (query_layer[:, 1:, :] * key_layer[:, :-1, :] + query_layer[:, :-1, :] * key_layer[:, 1:, :]) if self.use_relative_position: # [B, F, N*H] r = K.dot(rpe_neighbor1, ws["rel_kernel"]) # [B, F, N, H] r = array_ops.reshape(r, [ -1, self.num_memory_slots, self.num_attention_heads, self.size_per_head ]) # [B, N, F, H] r = array_ops.transpose(r, perm=[0, 2, 1, 3]) # [B*N, F, H] r = array_ops.reshape( r, [-1, self.num_memory_slots, self.size_per_head]) # [B*N, F, H] bd = query_layer[:, 1:, :] * r # [B*N, F, H] neighbor_score += bd # [B, F, N*H] r = K.dot(rpe_neighbor2, ws["rel_kernel"]) # [B, F, N, H] r = array_ops.reshape(r, [ -1, self.num_memory_slots, self.num_attention_heads, self.size_per_head ]) # [B, N, F, H] r = array_ops.transpose(r, perm=[0, 2, 1, 3]) # [B*N, F, H] r = array_ops.reshape( r, [-1, self.num_memory_slots, self.size_per_head]) # [B*N, F, H] bd = query_layer[:, :-1, :] * r # [B*N, F, H] neighbor_score += bd # [B*N, F] neighbor_score = K.sum(neighbor_score, axis=-1) # [B, N, F] neighbor_score = array_ops.reshape( neighbor_score, [-1, self.num_attention_heads, self.num_memory_slots]) # [B, F] neighbor_score = K.sum(neighbor_score, axis=1) # [B, F] mask = K.cast(ns_plus_one[:, :-1] > 0, tf.float32) # [B, F] neighbor_score += (1.0 - mask) * 10000.0 return context_layer, neighbor_score
def broadcast(x): return K.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1])
def call(self, x, mask=None): ''' Return an anchor box tensor based on the shape of the input tensor. The logic implemented here is identical to the logic in the module `ssd_box_encode_decode_utils.py`. Note that this tensor does not participate in any graph computations at runtime. It is being created as a constant once during graph creation and is just being output along with the rest of the model output during runtime. Because of this, all logic is implemented as Numpy array operations and it is sufficient to convert the resulting Numpy array into a Keras tensor at the very end before outputting it. Arguments: x (tensor): 4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'` or `(batch, height, width, channels)` if `dim_ordering = 'tf'`. The input for this layer must be the output of the localization predictor layer. ''' # Compute box width and height for each aspect ratio # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`. size = min(self.img_height, self.img_width) # Compute the box widths and and heights for all aspect ratios wh_list = [] for ar in self.aspect_ratios: if (ar == 1): # Compute the regular anchor box for aspect ratio 1. box_height = box_width = self.this_scale * size wh_list.append((box_width, box_height)) if self.two_boxes_for_ar1: # Compute one slightly larger version using the geometric mean of this scale value and the next. box_height = box_width = np.sqrt( self.this_scale * self.next_scale) * size wh_list.append((box_width, box_height)) else: box_height = self.this_scale * size / np.sqrt(ar) box_width = self.this_scale * size * np.sqrt(ar) wh_list.append((box_width, box_height)) wh_list = np.array(wh_list) """ # We need the shape of the input tensor if K.image_dim_ordering() == 'tf': batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape # Compute the grid of box center points. They are identical for all aspect ratios. # Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally. """ batch_size, feature_map_height, feature_map_width, feature_map_channels = K.int_shape( x) # if (self.this_steps is None): step_height = self.img_height / feature_map_height step_width = self.img_width / feature_map_width else: if isinstance(self.this_steps, (list, tuple)) and (len(self.this_steps) == 2): step_height = self.this_steps[0] step_width = self.this_steps[1] elif isinstance(self.this_steps, (int, float)): step_height = self.this_steps step_width = self.this_steps # Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image. if (self.this_offsets is None): offset_height = 0.5 offset_width = 0.5 else: if isinstance(self.this_offsets, (list, tuple)) and (len(self.this_offsets) == 2): offset_height = self.this_offsets[0] offset_width = self.this_offsets[1] elif isinstance(self.this_offsets, (int, float)): offset_height = self.this_offsets offset_width = self.this_offsets # Now that we have the offsets and step sizes, compute the grid of anchor box center points. cy = np.linspace(offset_height * step_height, (offset_height + feature_map_height - 1) * step_height, feature_map_height) cx = np.linspace(offset_width * step_width, (offset_width + feature_map_width - 1) * step_width, feature_map_width) cx_grid, cy_grid = np.meshgrid(cx, cy) cx_grid = np.expand_dims( cx_grid, -1 ) # This is necessary for np.tile() to do what we want further down cy_grid = np.expand_dims( cy_grid, -1 ) # This is necessary for np.tile() to do what we want further down # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)` # where the last dimension will contain `(cx, cy, w, h)` boxes_tensor = np.zeros( (feature_map_height, feature_map_width, self.n_boxes, 4)) boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # Set cx boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # Set cy boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)` boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2corners') # If `clip_boxes` is enabled, clip the coordinates to lie within the image boundaries if self.clip_boxes: x_coords = boxes_tensor[:, :, :, [0, 2]] x_coords[x_coords >= self.img_width] = self.img_width - 1 x_coords[x_coords < 0] = 0 boxes_tensor[:, :, :, [0, 2]] = x_coords y_coords = boxes_tensor[:, :, :, [1, 3]] y_coords[y_coords >= self.img_height] = self.img_height - 1 y_coords[y_coords < 0] = 0 boxes_tensor[:, :, :, [1, 3]] = y_coords # If `normalize_coords` is enabled, normalize the coordinates to be within [0,1] if self.normalize_coords: boxes_tensor[:, :, :, [0, 2]] /= self.img_width boxes_tensor[:, :, :, [1, 3]] /= self.img_height # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth. if self.coords == 'centroids': # Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`. boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2centroids', border_pixels='half') elif self.coords == 'minmax': # Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax). boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2minmax', border_pixels='half') # Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape # as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis. variances_tensor = np.zeros_like( boxes_tensor ) # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)` variances_tensor += self.variances # Long live broadcasting # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)` boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1) # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)` boxes_tensor = np.expand_dims(boxes_tensor, axis=0) boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1)) return boxes_tensor
def call(self, x, mask=None): size = min(self.img_height, self.img_width) # 在aspect ratios 中 计算宽和高 w_h_list = [] for ar in self.aspect_ratios: if (ar == 1): box_height = box_width = self.this_scale * size w_h_list.append((box_width, box_height)) if (self.two_boxes_for_ar1): # 计算通过这个比例值和下一个比例值,计算一个稍大的版本 box_height = box_width = np.sqrt( self.this_scale * self.next_scale) * size w_h_list.append((box_width, box_height)) else: box_height = self.this_scale * size / np.sqrt(ar) box_width = self.this_scale * size * np.sqrt(ar) w_h_list.append((box_width, box_height)) w_h_list = np.array(w_h_list) batch_size, feature_map_height, feature_map_width, feature_map_channels = x.shape if self.this_steps is None: step_height = self.img_height / feature_map_height step_width = self.img_width / feature_map_width else: if isinstance(self.this_steps, (list, tuple)) and (len(self.this_steps) == 2): step_height = self.this_steps[0] step_width = self.this_steps[1] elif isinstance(self.this_steps, (int, float)): step_height = self.this_steps step_width = self.this_steps # 计算offsets if self.this_offsets is None: offset_height = 0.5 offset_width = 0.5 else: if isinstance(self.this_offsets, (list, tuple)) and (len(self.this_offsets) == 2): offset_height = self.this_offsets[0] offset_width = self.this_offsets[1] elif isinstance(self.this_offsets, (int, float)): offset_height = self.this_offsets offset_width = self.this_offsets cy = np.linspace(offset_height * step_height, (offset_height + feature_map_height - 1) * step_height, feature_map_height) cx = np.linspace(offset_width * step_width, (offset_width + feature_map_width - 1) * step_width, feature_map_width) cx_grid, cy_grid = np.meshgrid(cx, cy) cx_grid = np.expand_dims(cx_grid, -1) cy_grid = np.expand_dims(cy_grid, -1) boxes_tensor = np.zeros( (feature_map_height, feature_map_width, self.n_boxes, 4)) boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) boxes_tensor[:, :, :, 2] = w_h_list[:, 0] boxes_tensor[:, :, :, 3] = w_h_list[:, 1] boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion="centroids2corners") if self.clip_boxes: x_coords = boxes_tensor[:, :, :, [0, 2]] x_coords[x_coords >= self.img_width] = self.img_width - 1 x_coords[x_coords < 0] = 0 boxes_tensor[:, :, :, [0, 2]] = x_coords y_coords = boxes_tensor[:, :, :, [1, 3]] y_coords[y_coords >= self.img_height] = self.img_height - 1 y_coords[y_coords < 0] = 0 boxes_tensor[:, :, :, [1, 3]] = y_coords if self.normalize_coords: boxes_tensor[:, :, :, [0, 2]] /= self.img_width boxes_tensor[:, :, :, [1, 3]] /= self.img_height if self.coords == "centroids": boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion="corners2centroids", border_pixel="half") variances_tensor = np.zeros_like(boxes_tensor) variances_tensor += self.variances boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1) boxes_tensor = np.expand_dims(boxes_tensor, axis=0) boxes_tensor = K.tile(K.constant(boxes_tensor, dtype="float32"), (K.shape(x)[0], 1, 1, 1, 1)) #shape = (feature_map_height,feature_map_width,n_boxes,8) return boxes_tensor
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tf.Tensor Final convolutional layer features. anchors : np.array, list Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy: tf.Tensor (x, y) box predictions adjusted by spatial location in conv layer. box_wh: tf.Tensor (w, h) box predictions adjusted by anchors and conv spatial resolution. box_conf: tf.Tensor Probability estimate for whether each box contains any object. box_class_pred: tf.Tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_confidence = K.sigmoid(feats[..., 4:5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_confidence, box_xy, box_wh, box_class_probs
def call(self, input_tensor, training=None): input_transposed = tf.transpose(input_tensor, [3, 0, 1, 2, 4]) input_shape = K.shape(input_transposed) input_tensor_reshaped = K.reshape(input_transposed, [ input_shape[1] * input_shape[0], self.input_height, self.input_width, self.input_num_atoms ]) input_tensor_reshaped.set_shape( (None, self.input_height, self.input_width, self.input_num_atoms)) if self.upsamp_type == 'resize': upsamp = K.resize_images(input_tensor_reshaped, self.scaling, self.scaling, 'channels_last') outputs = K.conv2d(upsamp, kernel=self.W, strides=(1, 1), padding=self.padding, data_format='channels_last') elif self.upsamp_type == 'subpix': conv = K.conv2d(input_tensor_reshaped, kernel=self.W, strides=(1, 1), padding='same', data_format='channels_last') outputs = tf.depth_to_space(conv, self.scaling) else: batch_size = input_shape[1] * input_shape[0] # Infer the dynamic output shape: out_height = deconv_output_length(input_length=self.input_height, stride=self.scaling, filter_size=self.kernel_size, padding=self.padding) out_width = deconv_output_length(input_length=self.input_width, stride=self.scaling, filter_size=self.kernel_size, padding=self.padding) output_shape = (batch_size, out_height, out_width, self.num_capsule * self.num_atoms) outputs = K.conv2d_transpose(input_tensor_reshaped, self.W, output_shape, (self.scaling, self.scaling), padding=self.padding, data_format='channels_last') votes_shape = K.shape(outputs) _, conv_height, conv_width, _ = outputs.get_shape() votes = K.reshape(outputs, [ input_shape[1], input_shape[0], votes_shape[1], votes_shape[2], self.num_capsule, self.num_atoms ]) votes.set_shape((None, self.input_num_capsule, conv_height.value, conv_width.value, self.num_capsule, self.num_atoms)) logit_shape = K.stack([ input_shape[1], input_shape[0], votes_shape[1], votes_shape[2], self.num_capsule ]) biases_replicated = K.tile(self.b, [votes_shape[1], votes_shape[2], 1, 1]) activations = update_routing(votes=votes, biases=biases_replicated, logit_shape=logit_shape, num_dims=6, input_dim=self.input_num_capsule, output_dim=self.num_capsule, num_routing=self.routings) return activations
def call(self, inputs, training=None): #inputs_hat.shape[None,input_num_capsule,num_capsule,dim_capsule] inputs_hat = tf.tensordot(inputs, self.reweight_W, axes=(-1, 0)) inputs_hat = K.permute_dimensions(inputs_hat, (0, 2, 1, 3)) b = K.expand_dims(self.routing_init, 0) b = K.tile(b, [K.shape(inputs_hat)[0], 1, 1]) assert self.routings > 0, 'The routings should be > 0.' for i in range(self.routings): # c.shape=[batch_size, num_capsule, input_num_capsule] c = softmax(b, axis=1) # c.shape = [batch_size, num_capsule, input_num_capsule] # inputs_hat.shape=[None, num_capsule, input_num_capsule , dim_capsule] # The first two dimensions as `batch` dimension, # then matmal: [input_num_capsule] x [input_num_capsule, dim_capsule] -> [dim_capsule]. # outputs.shape=[None, num_capsule, dim_capsule] outputs = squash(caps_batch_dot(c, inputs_hat, transpose=False)) # outputs.shape = [None, num_capsule, dim_capsule] # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] # The first two dimensions as `batch` dimension, # then matmal: [dim_capsule] x [input_num_capsule, dim_capsule]^T -> [input_num_capsule]. # b.shape=[batch_size, num_capsule, input_num_capsule] # b_add = caps_batch_dot(outputs, inputs_hat,transpose=True) # norm = (K.max(b,axis=1)-K.min(b_add,axis=1))/(b_add - K.min(b_add,axis=1)) b += caps_batch_dot(outputs, inputs_hat, transpose=True) # End: Routing algorithm -----------------------------------------------------------------------# c = softmax(b, axis=1) routing_score = K.expand_dims(c, -1) attention_output = routing_score * inputs_hat field_wise_embeds_list = [ K.squeeze(embeds, 1) for embeds in tf.split( attention_output, attention_output.shape[1], axis=1) ] # HiFM module square_of_sum_list = [ tf.square(reduce_sum(field_i_vectors, axis=1, keep_dims=True)) for field_i_vectors in field_wise_embeds_list ] sum_of_square_list = [ reduce_sum(field_i_vectors * field_i_vectors, axis=1, keep_dims=True) for field_i_vectors in field_wise_embeds_list ] field_fm = tf.concat([ square_of_sum - sum_of_square for square_of_sum, sum_of_square in zip(square_of_sum_list, sum_of_square_list) ], 1) hi_fm = reduce_sum(field_fm, axis=1) hi_fm = reduce_sum(field_fm * self.kernel_fm, axis=1) hi_fm = tf.nn.bias_add(hi_fm, self.bias_fm) # mf field_wise_vectors = reduce_sum(attention_output, axis=2, keep_dims=False) left = [] right = [] for i, j in itertools.combinations(list(range(self.num_fields)), 2): left.append(i) right.append(j) embeddings_left = tf.gather(params=field_wise_vectors, indices=left, axis=1) embeddings_right = tf.gather(params=field_wise_vectors, indices=right, axis=1) embeddings_prod = embeddings_left * embeddings_right field_weighted_embedding = embeddings_prod * self.kernel_mf h_mf = reduce_sum(field_weighted_embedding, axis=1) h_mf = tf.nn.bias_add(h_mf, self.bias_mf) # self-attention for _ in range(self.self_attention_layer): field_wise_vectors = InteractingLayer(self.self_attention_factor, self.head_num, True)(field_wise_vectors) high_int = reduce_sum(field_wise_vectors * self.kernel_highint, axis=1) high_int = tf.nn.bias_add(high_int, self.bias_highint) return concat_func([hi_fm, h_mf, high_int]), routing_score
def _zip(foo): y_val, x_val = foo bar = backend.tile(y_val, array_ops.shape(x_val)) return array_ops.stack([bar, x_val], axis=1)
def call(self, x, mask=None): ''' 根据输入张量的形状返回锚框张量。 Arguments: x (张量): 4D `(batch, height, width, channels)` . 该层的输入必须是本地化预测器层的输出。 ''' #=====================================不同策略仅需修改下面这一部分代码================================= # 计算每一个宽高比下的宽和高。 # 图像将根据`scale` 和 `aspect_ratios`并利用较短的边计算`w` and `h`。 size = min(self.img_height, self.img_width) # 计算所有纵横比的框宽和高 wh_list = [] for ar in self.aspect_ratios: if (ar == 1): # 计算宽高比为1.的常规锚框。 box_height = box_width = self.this_scale * size wh_list.append((box_width, box_height)) if self.two_boxes_for_ar1: # 使用此比例尺值的几何平均值计算一个稍大的包围框。 box_height = box_width = np.sqrt(self.this_scale * self.next_scale) * size wh_list.append((box_width, box_height)) else: box_height = self.this_scale * size / np.sqrt(ar) box_width = self.this_scale * size * np.sqrt(ar) wh_list.append((box_width, box_height)) wh_list = np.array(wh_list) #===================================================================================================== # 输入的shape,这是我们所必须的 batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape # 获取step尺寸。 step_height = self.this_steps step_width = self.this_steps # 获取offsets尺寸。 offset_height = self.this_offsets offset_width = self.this_offsets # 现在我们有了偏移量和步长,计算锚点盒中心点的网格。 cy = np.linspace(offset_height * step_height, (offset_height + feature_map_height - 1) * step_height, feature_map_height) cx = np.linspace(offset_width * step_width, (offset_width + feature_map_width - 1) * step_width, feature_map_width) cx_grid, cy_grid = np.meshgrid(cx, cy) cx_grid = np.expand_dims(cx_grid, -1) cy_grid = np.expand_dims(cy_grid, -1) # 创建一个4D模板`(feature_map_height, feature_map_width, n_boxes, 4)`,这里最后一个维度包含`(cx, cy, w, h)` boxes_tensor = np.zeros((feature_map_height, feature_map_width, self.n_boxes, 4)) boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, self.n_boxes)) # 设置 cx boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, self.n_boxes)) # 设置 cy boxes_tensor[:, :, :, 2] = wh_list[:, 0] # 设置 w boxes_tensor[:, :, :, 3] = wh_list[:, 1] # 设置 h # 转换 `(cx, cy, w, h)` 为 `(xmin, xmax, ymin, ymax)` boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='centroids2corners') # 进行标准化,使所有值在[0, 1]。 if self.normalize_coords: boxes_tensor[:, :, :, [0, 2]] /= self.img_width boxes_tensor[:, :, :, [1, 3]] /= self.img_height if self.coords == 'centroids': # 转换 `(xmin, ymin, xmax, ymax)` 为 `(cx, cy, w, h)`. boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2centroids', border_pixels='half') elif self.coords == 'minmax': # 转换 `(xmin, ymin, xmax, ymax)` 为 `(xmin, xmax, ymin, ymax). boxes_tensor = convert_coordinates(boxes_tensor, start_index=0, conversion='corners2minmax', border_pixels='half') variances_tensor = np.zeros_like(boxes_tensor) # shape为 `(feature_map_height, feature_map_width, n_boxes, 4)` variances_tensor += self.variances # 现在 `boxes_tensor` 变为形状`(feature_map_height, feature_map_width, n_boxes, 8)`的张量。 boxes_tensor = np.concatenate((boxes_tensor, variances_tensor), axis=-1) boxes_tensor = np.expand_dims(boxes_tensor, axis=0) boxes_tensor = K.tile(K.constant(boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1)) return boxes_tensor
def constant(input_batch, size): batch_size = K.shape(input_batch)[0] return K.tile(K.ones((1, size)), (batch_size, 1))
def msssim(self, y_true, y_pred): ''' Compute multiscale ssim according to Zhao 2016. Has only been tested with tensorflow backend (channels last) so far! Uses convolutions to do the calculations in one go. This function takes proper 2D Keras Tensors (NWHC or NCWH) # Arguments y_true: Keras Tensor with Rank 4: Image to compare to y_pred: Keras Tensor with Rank 4: Image to compare ''' # some useful inits channels = self.__int_shape(y_pred)[self.channel_dim] # repeat kernel for each channel kernel = K.tile(self.kernels, [1, 1, channels, 1]) # compute means mu_true = K.depthwise_conv2d(y_true, kernel, padding='same') mu_pred = K.depthwise_conv2d(y_pred, kernel, padding='same') # compute mean squares mu_true_sq = K.square(mu_true) mu_pred_sq = K.square(mu_pred) mu_true_pred = mu_true * mu_pred # compute input square y_true_sq = K.square(y_true) y_pred_sq = K.square(y_pred) y_true_pred = y_true * y_pred # compute variances/covariance sigma_true_sq = K.depthwise_conv2d(y_true_sq, kernel, padding='same') sigma_pred_sq = K.depthwise_conv2d(y_pred_sq, kernel, padding='same') sigma_true_pred = K.depthwise_conv2d(y_true_pred, kernel, padding='same') # centered squares of variances sigma_true_sq -= mu_true_sq sigma_pred_sq -= mu_pred_sq sigma_true_pred -= mu_true_pred # compute luminance term (l), select only maximum kernel for each channel l = (2 * mu_true_pred + self.c1) / (mu_true_sq + mu_pred_sq + self.c1) if self.dim_ordering == 'channels_last': l_max = l[:,:,:,(self.num - 1)::self.num] else: l_max = l[:,(self.num - 1)::self.num,:,:] # compute contrast-structure term (cs) cs = (2 * sigma_true_pred + self.c2) / (sigma_true_sq + sigma_pred_sq + self.c2) # compute product of different scale cs if self.dim_ordering == 'channels_last': pcs = [K.prod(cs[:,:,:,i*self.num:(i+1)*self.num], axis=-1, keepdims=True) for i in range(channels)] else: pcs = [K.prod(cs[:,i*self.num:(i+1)*self.num,:,:], axis=1, keepdims=True) for i in range(channels)] pcs = K.concatenate(pcs, axis=self.channel_dim) # compute msssim map msssim = l_max * pcs # do normalization? return msssim