示例#1
0
    def _calculate_features(self, xy, wh, objectiveness, classes, anchors):
        shape = K.shape(xy)[1:3]  # width, height

        xy_sig = K.sigmoid(xy)
        # TODO rethink logic here, grid needs to be calculated just once after model initialization
        col = K.reshape(K.tile(K.arange(0, shape[0]), shape[0:1]),
                        (-1, shape[0]))
        row = K.reshape(K.tile(K.arange(0, shape[1]), shape[1:2]),
                        (-1, shape[1]))
        row = K.transpose(row)
        col = K.repeat_elements(K.reshape(col, (shape[0], shape[1], 1, 1)),
                                rep=len(anchors),
                                axis=-2)
        row = K.repeat_elements(K.reshape(row, (shape[0], shape[1], 1, 1)),
                                rep=len(anchors),
                                axis=-2)
        grid = K.concatenate((col, row), axis=-1)
        # TODO same thing for the anchors
        anchors_tensor = K.reshape(K.constant(anchors),
                                   [1, 1, 1, len(anchors), 2])

        box_xy = (xy_sig + K.cast(grid, K.dtype(xy_sig))) / (shape[0],
                                                             shape[1])

        box_wh = K.exp(wh) * anchors_tensor / K.cast(self.input_image_dims,
                                                     K.dtype(wh))

        obj_sig = K.sigmoid(objectiveness)
        class_sig = K.sigmoid(classes)

        return box_xy, box_wh, obj_sig, class_sig
示例#2
0
	def compute_position_ids(self, inputs):
		"""T5的相对位置分桶(直接翻译自官方T5源码)
		"""
		q, v = inputs
		# 计算位置差
		q_idxs = K.arange(0, K.shape(q)[1], dtype='int32')
		q_idxs = K.expand_dims(q_idxs, 1)
		v_idxs = K.arange(0, K.shape(v)[1], dtype='int32')
		v_idxs = K.expand_dims(v_idxs, 0)
		pos_ids = v_idxs - q_idxs
		# 后处理操作
		num_buckets, max_distance = self.input_dim, self.max_distance
		ret = 0
		n = -pos_ids
		if self.bidirectional:
			num_buckets //= 2
			ret += K.cast(K.less(n, 0), 'int32') * num_buckets
			n = K.abs(n)
		else:
			n = K.maximum(n, 0)
		# now n is in the range [0, inf)
		max_exact = num_buckets // 2
		is_small = K.less(n, max_exact)
		val_if_large = max_exact + K.cast(
			K.log(K.cast(n, K.floatx()) / max_exact) /
			np.log(max_distance / max_exact) * (num_buckets - max_exact),
			'int32',
		)
		val_if_large = K.minimum(val_if_large, num_buckets - 1)
		ret += K.switch(is_small, n, val_if_large)
		return ret
示例#3
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):

    num_anchors = len(anchors)

    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
示例#4
0
def shift(shape, stride, anchors):
    """Produce shifted anchors based on shape of the map and stride size.

    Args:
        shape (tuple): Shape to shift the anchors over.
        stride (int): Stride to shift the anchors with over the shape.
        anchors (numpy.array): The anchors to apply at each location.

    Returns:
        numpy.array: shifted anchors
    """
    shift_x = (K.arange(0, shape[1], dtype=K.floatx()) +
               K.constant(0.5, dtype=K.floatx())) * stride
    shift_y = (K.arange(0, shape[0], dtype=K.floatx()) +
               K.constant(0.5, dtype=K.floatx())) * stride

    shift_x, shift_y = tf.meshgrid(shift_x, shift_y)
    shift_x = K.reshape(shift_x, [-1])
    shift_y = K.reshape(shift_y, [-1])

    shifts = K.stack([shift_x, shift_y, shift_x, shift_y], axis=0)

    shifts = K.transpose(shifts)
    number_of_anchors = K.shape(anchors)[0]

    k = K.shape(shifts)[0]  # number of base points = feat_h * feat_w

    shifts = K.cast(K.reshape(shifts, [k, 1, 4]), K.floatx())
    shifted_anchors = K.reshape(anchors, [1, number_of_anchors, 4]) + shifts
    shifted_anchors = K.reshape(shifted_anchors, [k * number_of_anchors, 4])

    return shifted_anchors
示例#5
0
 def _compute_valid_seed_region(self):
     positions = K.concatenate([
         K.expand_dims(K.tile(K.expand_dims(K.arange(self.height), axis=1),
                              [1, self.width]),
                       axis=-1),
         K.expand_dims(K.tile(K.expand_dims(K.arange(self.width), axis=0),
                              [self.height, 1]),
                       axis=-1),
     ],
                               axis=-1)
     half_block_size = self.block_size // 2
     valid_seed_region = K.switch(
         K.all(
             K.stack(
                 [
                     positions[:, :, 0] >= half_block_size,
                     positions[:, :, 1] >= half_block_size,
                     positions[:, :, 0] < self.height - half_block_size,
                     positions[:, :, 1] < self.width - half_block_size,
                 ],
                 axis=-1,
             ),
             axis=-1,
         ),
         self.ones,
         self.zeros,
     )
     return K.expand_dims(K.expand_dims(valid_seed_region, axis=0), axis=-1)
示例#6
0
    def call(self, inputs, training=None, **kwargs):
        inputs, memory = inputs
        batch_size = K.shape(inputs)[0]
        seq_len = K.shape(inputs)[1]
        mem_mask = K.tile(K.ones_like(memory[:, :, :1], dtype=K.floatx()), [1, 1, seq_len])

        # Build content mask with random permutation
        ranges = K.tile(K.expand_dims(K.arange(0, seq_len), axis=-1), [1, batch_size])
        if self.enabled:
            shuffle = random_shuffle(ranges)
        else:
            shuffle = ranges
        if self.directional:
            shuffled = K.in_train_phase(shuffle, ranges, training)
        else:
            if self.enabled:
                shuffled = K.in_train_phase(shuffle, ranges + seq_len, training)
            else:
                shuffled = ranges + seq_len
        ranges = K.expand_dims(K.permute_dimensions(ranges, [1, 0]), axis=-1)
        shuffled = K.expand_dims(K.permute_dimensions(shuffled, [1, 0]), axis=1)
        content_mask = K.cast(ranges <= shuffled, dtype=K.floatx())

        # Build query mask based on content mask
        ranges = K.arange(0, seq_len)
        eye = K.equal(K.expand_dims(ranges, axis=0), K.expand_dims(ranges, axis=-1))
        eye = K.expand_dims(K.cast(eye, dtype=K.floatx()), axis=0)
        query_mask = content_mask * (1.0 - eye)

        content_mask = K.concatenate([mem_mask, content_mask], axis=1)
        query_mask = K.concatenate([mem_mask, query_mask], axis=1)
        return [
            K.permute_dimensions(content_mask, [0, 2, 1]),
            K.permute_dimensions(query_mask, [0, 2, 1]),
        ]
示例#7
0
文件: yolo.py 项目: rameez471/yolo-tf
def yolo_head(feats,anchors,num_classes,input_shape,calc_loss=False):
    """Convert final predictions into bounding boxes"""
    num_anchors = len(anchors)
    # (batch, height, width, num_anchors, box_prams)
    anchor_tensor = K.reshape(K.constant(anchors),[1,1,1,num_anchors,2])

    grid_shape = K.shape(feats)[1:3] #(height,width)
    grid_y = K.tile(K.reshape(K.arange(0,stop=grid_shape[0]),[-1,1,1,1]),
                    [1,grid_shape[1],1,1])
    grid_x = K.tile(K.reshape(K.arange(0,stop=grid_shape[1]),[1,-1,1,1]),
                    [grid_shape[0],1,1,1])
    grid = K.concatenate([grid_x,grid_y])
    grid = K.cast(grid,K.dtype(feats))

    feats = K.reshape(
                feats,[-1,grid.shape[0],grid.shape[1],num_anchors,num_classes+5])

    box_xy = (K.sigmoid(feats[...,:2])+grid) / K.cast(grid_shape[::-1],K.dtype(feats))
    box_wh = K.exp(feats[...,2:4]) * anchor_tensor / K.cast(input_shape[::-1],K.dtype(feats))
    box_confidence = K.sigmoid(feats[...,4:5])
    box_class_probs = K.sigmoid(feats[...,5:])

    if calc_loss:
        return grid,feats,box_xy,box_wh

    return box_xy, box_wh, box_confidence, box_class_probs
示例#8
0
    def call(self, inputs, mask=None, **kwargs):
        input_len = K.shape(inputs)[1]

        if self.attention_type == SeqSelfAttention.ATTENTION_TYPE_ADD:
            e = self._call_additive_emission(inputs)
        elif self.attention_type == SeqSelfAttention.ATTENTION_TYPE_MUL:
            e = self._call_multiplicative_emission(inputs)

        if self.attention_activation is not None:
            e = self.attention_activation(e)
        if self.attention_width is not None:
            if self.history_only:
                lower = K.arange(0, input_len) - (self.attention_width - 1)
            else:
                lower = K.arange(0, input_len) - self.attention_width // 2
            lower = K.expand_dims(lower, axis=-1)
            upper = lower + self.attention_width
            indices = K.expand_dims(K.arange(0, input_len), axis=0)
            e -= 10000.0 * (1.0 - K.cast(lower <= indices, K.floatx()) * K.cast(indices < upper, K.floatx()))
        if mask is not None:
            mask = K.expand_dims(K.cast(mask, K.floatx()), axis=-1)
            e -= 10000.0 * ((1.0 - mask) * (1.0 - K.permute_dimensions(mask, (0, 2, 1))))

        # a_{t} = \text{softmax}(e_t)
        e = K.exp(e - K.max(e, axis=-1, keepdims=True))
        a = e / K.sum(e, axis=-1, keepdims=True)

        # l_t = \sum_{t'} a_{t, t'} x_{t'}
        v = K.batch_dot(a, inputs)
        if self.attention_regularizer_weight > 0.0:
            self.add_loss(self._attention_regularizer(a))

        if self.return_attention:
            return [v, a]
        return v
示例#9
0
def positional_signal(hidden_size: int, length: int,
                      min_timescale: float = 1.0, max_timescale: float = 1e4):
    """
    Helper function, constructing basic positional encoding.
    The code is partially based on implementation from Tensor2Tensor library
    https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/layers/common_attention.py
    """

    '''if hidden_size % 2 != 0:
        raise ValueError(
            f"The hidden dimension of the model must be divisible by 2."
            f"Currently it is {hidden_size}")'''
    position = K.arange(0, length, dtype=tf.float32)
    num_timescales = hidden_size // 2
    log_timescale_increment = tf.constant(
        (np.log(float(max_timescale) / float(min_timescale)) /
         (num_timescales - 1)),
        dtype=tf.float32)
    inv_timescales = (
            min_timescale *
            tf.exp(K.arange(num_timescales, dtype=tf.float32) *
                  -log_timescale_increment))
    scaled_time = tf.expand_dims(position, 1) * tf.expand_dims(inv_timescales, 0)
    signal = tf.concat([tf.sin(scaled_time), tf.cos(scaled_time)], axis=1)
    return tf.expand_dims(signal, axis=0)
 def call(self,
          inputs: tensorflow.Tensor,
          mask: Optional[tensorflow.Tensor] = None,
          **kwargs) -> tensorflow.Tensor:
     if isinstance(inputs, list):
         query, key, value = inputs
     else:
         query = key = value = inputs
     if isinstance(mask, list):
         mask = mask[1]
     feature_dim = K.shape(query)[-1]
     e = K.batch_dot(query, key, axes=2) / K.sqrt(
         K.cast(feature_dim, dtype=K.floatx()))
     e = K.exp(e - K.max(e, axis=-1, keepdims=True))
     if self.history_only:
         query_len, key_len = K.shape(query)[1], K.shape(key)[1]
         indices = K.tile(K.expand_dims(K.arange(key_len), axis=0),
                          [query_len, 1])
         upper = K.expand_dims(K.arange(key_len), axis=-1)
         e *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0)
     if mask is not None:
         e *= K.cast(K.expand_dims(mask, axis=-2), K.floatx())
     a = e / (K.sum(e, axis=-1, keepdims=True) + K.epsilon())
     v = K.batch_dot(a, value)
     if self.return_attention:
         return [v, a]
     return v
示例#11
0
def positional_signal(hidden_size: int, length: int,
                      min_timescale: float = 1.0, max_timescale: float = 1e4):
    """
    Helper function, constructing positional encodings as described in
    "Attention is All You Need" (https://arxiv.org/abs/1706.03762)
    The implementation was taken from https://github.com/kpot/keras-transformer
    """

    if hidden_size % 2 != 0:
        raise ValueError(
            f"The hidden dimension of the model must be divisible by 2. "
            f"Currently it is {hidden_size}")
    position = K.arange(0, length, dtype=K.floatx())
    num_timescales = hidden_size // 2
    log_timescale_increment = K.constant(
        (np.log(float(max_timescale) / float(min_timescale)) /
         (num_timescales - 1)),
        dtype=K.floatx())
    inv_timescales = (
            min_timescale *
            K.exp(K.arange(num_timescales, dtype=K.floatx()) *
                  -log_timescale_increment))
    scaled_time = K.expand_dims(position, 1) * K.expand_dims(inv_timescales, 0)
    signal = K.concatenate([K.sin(scaled_time), K.cos(scaled_time)], axis=1)
    return K.expand_dims(signal, axis=0)
示例#12
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    conv_dims = K.shape(feats)[1:3]
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])

    x_axis, y_axis = meshgrid(conv_width_index, conv_height_index)

    grid = K.concatenate([x_axis, y_axis])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        conv_dims[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
示例#13
0
def yolo_head(feats, anchors, input_shape, calc_loss=False, att_map=None):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(feats,
                      [-1, grid_shape[0], grid_shape[1], num_anchors, 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[..., ::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[..., ::-1], K.dtype(feats))
    if att_map is not None:
        seg_map = K.tile(att_map, [1, 1, 1, 3])
        seg_map = K.expand_dims(seg_map, axis=-1)
        box_confidence = K.sigmoid(
            feats[..., 4:5]
        )  #*.8+seg_map*.2  ##denote if add attention score to confidence score
    else:
        box_confidence = K.sigmoid(feats[..., 4:5])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence
示例#14
0
    def call(self, inputs):
        input_shape = K.shape(inputs)
        if self.data_format == 'channels_first':
            x = K.arange(0, input_shape[2], dtype=inputs.dtype)
            y = K.arange(0, input_shape[3], dtype=inputs.dtype)
        else:
            x = K.arange(0, input_shape[1], dtype=inputs.dtype)
            y = K.arange(0, input_shape[2], dtype=inputs.dtype)

        x = x / K.max(x)
        y = y / K.max(y)

        loc_x, loc_y = tf.meshgrid(x, y, indexing='ij')

        if self.data_format == 'channels_first':
            loc = K.stack([loc_x, loc_y], axis=0)
        else:
            loc = K.stack([loc_x, loc_y], axis=-1)

        location = K.expand_dims(loc, axis=0)
        if self.data_format == 'channels_first':
            location = K.permute_dimensions(location, pattern=[0, 2, 3, 1])

        location = tf.tile(location, [input_shape[0], 1, 1, 1])

        if self.data_format == 'channels_first':
            location = K.permute_dimensions(location, pattern=[0, 3, 1, 2])

        return location
示例#15
0
def cell_offset_table(scale_size):
    # Dynamic implementation of conv dims for fully convolutional model.
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=scale_size)
    conv_width_index = K.arange(0, stop=scale_size)
    conv_height_index = K.tile(conv_height_index, [scale_size]) # 늘어놓는 함수  tile -> 같은걸 N번 반복함
    # 결과 -> 0~12, 0~12, ...., 0~12

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(
        K.expand_dims(conv_width_index, 0), [scale_size, 1]) # tile을 [n, m] 쓰면 dims 2로 만들어줌
    # 결과 -> [0~12], [0~12], [0~12], ...

    conv_width_index = K.flatten(K.transpose(conv_width_index))
    # 결과 -> 0, 0, 0, 0, 0, 0, 0 (13개), 1, 1, 1, 1, 1, 1, 1 (13개), ...

    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    # 결과 -> [0, 0], [1, 0], [2, 0], ..., [11, 12], [12, 12]

    conv_index = K.reshape(conv_index, [1, scale_size, scale_size, 1, 2])
    # 결과 -> 1 * 13 * 13 에 있는 [1 * 2]의 conv index item이 만들어짐
    # 각각 [1 * 2]의 값은 [0, 0], [1, 0], [2, 0], ..., [11, 12], [12, 12]
    # 이런 식으로 이루어져 있음 -> Mask를 만들기 위한 과정
    # 결과 shape -> 1, 13, 13, 1, 2

    conv_index = K.cast(conv_index, tf.float32)

    diff = (1 / scale_size * 416)
    conv_index = conv_index * diff

    return conv_index
示例#16
0
    def _encoder(x):
        # x = tf.keras.layers.Dropout(rate)(x)

        # Two Embeddings (3 for classes, 10 for degrees)
        cls = K.expand_dims(K.arange(3), axis=0)
        cls = K.stop_gradient(cls)
        cls = tf.keras.layers.Embedding(3, d_model)(cls)
        cls = K.expand_dims(cls, axis=2)  # (1, 3, 1, d_model)

        direct = K.expand_dims(K.arange(10), axis=0)
        direct = K.stop_gradient(direct)
        direct = tf.keras.layers.Embedding(10, d_model)(direct)
        direct = K.expand_dims(direct, axis=1)  # (1, 1, 10, d_model)

        embedding = tf.keras.layers.Reshape((30, d_model))(cls + direct)

        for i in range(n_layers):
            x = transformer_layer(d_model, n_heads, dff, rate)(x)

        x = multi_head_attention(d_model, n_heads,
                                 perm_and_reshape=False)(embedding, x, x)
        x = tf.keras.layers.Dropout(rate)(x)
        x = tf.keras.layers.BatchNormalization()(x)

        if softmax:
            x = tf.keras.layers.Softmax()(x)

        return x
示例#17
0
    def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
        """转换识别结果
        例如:(batch_size,13,13,255) -> (batch_size,13,13,3,85)
        """
        num_anchors = len(anchors)
        anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

        grid_shape = K.shape(feats)[1:3]  # 特征层高和宽
        grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                        [1, grid_shape[1], 1, 1])
        grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                        [grid_shape[0], 1, 1, 1])
        grid = K.concatenate([grid_x, grid_y])
        # 生成 特征层网格点坐标
        # 如(13,13)特征层面,[[(0,0)..(0,12)]..[(12,0)..[12,12]]]

        grid = K.cast(grid, K.dtype(feats))
        feats = K.reshape(
            feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

        # 网格点坐标(特征层中心点)+识别结果(偏移量)
        box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
        box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))

        if calc_loss == True:
            return grid, feats, box_xy, box_wh
        else:
            box_confidence = K.sigmoid(feats[..., 4:5])
            box_class_probs = K.sigmoid(feats[..., 5:])  # todo:这里调用激活函数是起到什么作用
            return box_xy, box_wh, box_confidence, box_class_probs
def yolo3_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
 def call(self, inputs, mask=None, **kwargs):
     if len(inputs) == 4:
         query, key, value, prev = inputs
         mask = mask[1]
     else:
         query = key = value = inputs[0]
         prev = inputs[1]
         mask = mask[0]
     feature_dim = K.shape(query)[-1]
     e = K.batch_dot(query, key, axes=2) / K.sqrt(
         K.cast(feature_dim, dtype=K.floatx()))
     new_prev = e = e + prev
     if self.history_only:
         query_len, key_len = K.shape(query)[1], K.shape(key)[1]
         indices = K.expand_dims(K.arange(0, key_len), axis=0)
         upper = K.expand_dims(K.arange(0, query_len), axis=-1)
         e -= 10000.0 * K.expand_dims(K.cast(indices > upper, K.floatx()),
                                      axis=0)
     if mask is not None:
         e -= 10000.0 * (1.0 -
                         K.cast(K.expand_dims(mask, axis=-2), K.floatx()))
     self.intensity = e
     e = K.exp(e - K.max(e, axis=-1, keepdims=True))
     self.attention = e / K.sum(e, axis=-1, keepdims=True)
     v = K.batch_dot(self.attention, value)
     output = [v, new_prev]
     if self.return_attention:
         output.append(self.attention)
     return output
示例#20
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    num_anchors = len(anchors)
    # [1, 1, 1, num_anchors, 2]
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    # 获得x,y的网格
    # (13,13, 1, 2)
    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    # (batch_size,13,13,3,85)
    feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # 将预测值调成真实值
    # box_xy对应框的中心点
    # box_wh对应框的宽和高
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[...,::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[...,::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    # 在计算loss的时候返回如下参数
    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
示例#21
0
文件: layers.py 项目: adowu/bert4one
    def call(self, inputs):
        """如果custom_position_ids,那么第二个输入为自定义的位置id
        """
        if self.custom_position_ids:
            seq_len = K.shape(inputs)[1]
            inputs, position_ids = inputs
            if 'float' not in K.dtype(position_ids):
                position_ids = K.cast(position_ids, K.floatx())
        else:
            input_shape = K.shape(inputs)
            batch_size, seq_len = input_shape[0], input_shape[1]
            position_ids = K.arange(0, seq_len, dtype=K.floatx())[None]

        indices = K.arange(0, self.output_dim // 2, dtype=K.floatx())
        indices = K.pow(10000.0, -2 * indices / self.output_dim)
        embeddings = tf.einsum('bn,d->bnd', position_ids, indices)
        embeddings = K.stack([K.sin(embeddings), K.cos(embeddings)], axis=-1)
        embeddings = K.reshape(embeddings, (-1, seq_len, self.output_dim))

        if self.merge_mode == 'add':
            return inputs + embeddings
        elif self.merge_mode == 'mul':
            return inputs * embeddings
        else:
            if not self.custom_position_ids:
                embeddings = K.tile(embeddings, [batch_size, 1, 1])
            return K.concatenate([inputs, embeddings])
    def call(self, inputs):
        #input_shape = K.cast(K.shape(inputs), dtype='int64')
        #input_shape=K.cast(inputs.shape,dtype='int64')
        input_shape = inputs.shape
        output_shape = (input_shape[0], input_shape[1] * self.stride[1],
                        input_shape[2] * self.stride[2], input_shape[3])
        #output_list = []
        #output_list.append(self.pooling_argmax // (output_shape[2] * output_shape[3]))
        #output_list.append(self.pooling_argmax % (output_shape[2] * output_shape[3]) // output_shape[3])
        argmax = self.pooling_argmax  #K.stack(output_list)

        one_like_mask = K.ones_like(argmax)

        batch_range = K.reshape(K.arange(start=0,
                                         stop=input_shape[0],
                                         dtype='int64'),
                                shape=[input_shape[0], 1, 1, 1])

        b = one_like_mask * batch_range
        y = argmax // (output_shape[2] * output_shape[3])
        x = argmax % (output_shape[2] * output_shape[3]) // output_shape[3]
        feature_range = K.arange(start=0, stop=output_shape[3], dtype='int64')
        f = one_like_mask * feature_range
        # transpose indices & reshape update values to one dimension
        updates_size = tf.size(inputs)
        indices = K.transpose(
            K.reshape(K.stack([b, y, x, f]), [4, updates_size]))
        values = K.reshape(inputs, [updates_size])
        return tf.scatter_nd(indices, values, output_shape)
示例#23
0
    def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
        """

            :param feats:           (N, 13, 13, 3 * (5+n_class)), ...
            :param anchors:         (3, 2)
            :param num_classes:     15
            :param input_shape:     (416, 416)
            :param calc_loss:
            :return:
            """

        num_anchors = len(anchors)
        # Reshape to batch, height, width, num_anchors, box_params.

        if calc_loss:
            anchors_tensor = K.reshape(K.constant(anchors),
                                       [1, 1, 1, num_anchors, 2])
            grid_shape = K.shape(feats)[1:3]  # height, width
            grid_y = K.tile(
                K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                [1, grid_shape[1], 1, 1])
            grid_x = K.tile(
                K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                [grid_shape[0], 1, 1, 1])
            grid = K.concatenate([grid_x, grid_y])
            grid = K.cast(grid, K.floatx())
            feats = K.reshape(feats, [
                -1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5
            ])
            box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
                grid_shape[::-1], K.dtype(feats))
            box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
                input_shape[::-1], K.dtype(feats))
            return grid, feats, box_xy, box_wh

        else:
            anchors_tensor = np.reshape(np.array(anchors),
                                        [1, 1, 1, num_anchors, 2])
            grid_shape = np.asarray(feats.shape[1:3])  # height, width
            grid_y = np.tile(
                np.reshape(np.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                [1, grid_shape[1], 1, 1])
            grid_x = np.tile(
                np.reshape(np.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                [grid_shape[0], 1, 1, 1])
            grid = np.concatenate([grid_x, grid_y], axis=-1)
            grid = grid.astype(feats.dtype)

            feats = np.reshape(feats, [
                -1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5
            ])

            box_xy = (utils.sigmoid(feats[..., :2]) +
                      grid) / grid_shape[..., ::-1].astype(feats.dtype)
            box_wh = np.exp(feats[..., 2:4]) * anchors_tensor / input_shape[
                ..., ::-1].astype(feats.dtype)
            box_confidence = utils.sigmoid(feats[..., 4:5])
            box_class_probs = utils.sigmoid(feats[..., 5:])
        return box_xy, box_wh, box_confidence, box_class_probs
示例#24
0
def yolo4_decode(feats,
                 anchors,
                 num_classes,
                 input_shape,
                 scale_x_y=None,
                 calc_loss=False):
    """Decode final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    # ----------------------------------------------------------------------------------------------------------
    # 生成 grid 网格基准 (13, 13, 1, 2)
    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    # Reshape to ([batch_size, height, width, num_anchors, (num_classes+5)])
    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust predictions to each spatial grid point and anchor size.
    # box_xy 数值范围调整为【0-1】(归一化)
    # box_wh 数值范围调整为 【0-1】(归一化),输入尺寸是使用backbone的最小特征图尺寸*stride得到的
    # 强调说明一下:这里 box_xy 是相对于grid 的位置(说成input似乎也行);box_wh是相对于 input_shape大小
    # scale_x_y是一个 trick,见下文链接
    if scale_x_y:
        # Eliminate grid sensitivity trick involved in YOLOv4
        #
        # Reference Paper & code:
        #     "YOLOv4: Optimal Speed and Accuracy of Object Detection"
        #     https://arxiv.org/abs/2004.10934
        #     https://github.com/opencv/opencv/issues/17148
        #     https://zhuanlan.zhihu.com/p/139724869
        box_xy_tmp = K.sigmoid(
            feats[..., :2]) * scale_x_y - (scale_x_y - 1) / 2
        box_xy = (box_xy_tmp + grid) / K.cast(grid_shape[..., ::-1],
                                              K.dtype(feats))
    else:
        box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
            grid_shape[..., ::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[..., ::-1], K.dtype(feats))
    # sigmoid objectness scores 置信度解码
    box_confidence = K.sigmoid(feats[..., 4:5])
    # class probs 类别解码
    box_class_probs = K.sigmoid(feats[..., 5:])

    #   在计算loss的时候返回grid, feats, box_xy, box_wh
    #   在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs
    if calc_loss:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
示例#25
0
 def _attention_regularizer(self, attention):
     batch_size = K.cast(K.shape(attention)[0], K.floatx())
     input_len = K.shape(attention)[-1]
     indices = K.expand_dims(K.arange(0, input_len), axis=0)
     diagonal = K.expand_dims(K.arange(0, input_len), axis=-1)
     eye = K.cast(K.equal(indices, diagonal), K.floatx())
     return self.attention_regularizer_weight * K.sum(K.square(K.batch_dot(
         attention,
         K.permute_dimensions(attention, (0, 2, 1))) - eye)) / batch_size
示例#26
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])
    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    box_confidence = K.sigmoid(feats[..., 4:5])
    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_confidence, box_xy, box_wh, box_class_probs
示例#27
0
def yolo3_decode(feats,
                 anchors,
                 num_classes,
                 input_shape,
                 scale_x_y=None,
                 calc_loss=False):
    """Decode final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    box_xy = feats[..., :2]

    box_wh = feats[..., 2:4]
    box_xy = tf.where(box_xy < -10.0, -10.0, box_xy)
    box_xy = tf.where(box_xy > 10.0, 10.0, box_xy)
    box_wh = tf.where(box_wh < -8.0, -8.0, box_wh)
    box_wh = tf.where(box_wh > 8.0, 8.0, box_wh)

    # Adjust preditions to each spatial grid point and anchor size.
    if scale_x_y:
        # Eliminate grid sensitivity trick involved in YOLOv4
        #
        # Reference Paper & code:
        #     "YOLOv4: Optimal Speed and Accuracy of Object Detection"
        #     https://arxiv.org/abs/2004.10934
        #     https://github.com/opencv/opencv/issues/17148
        #
        box_xy_tmp = K.sigmoid(
            feats[..., :2]) * scale_x_y - (scale_x_y - 1) / 2
        box_xy = (box_xy_tmp + grid) / K.cast(grid_shape[..., ::-1],
                                              K.dtype(feats))
    else:
        box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
            grid_shape[..., ::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[..., ::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
示例#28
0
    def build(self, input_shape):
        self.pos_encoding = self.add_weight(shape=(input_shape[0],self.d_model),
                                       initializer=tf.keras.initializers.Zeros(),
                                       name='pos_encoding',
                                       trainable=False)

        self.position = K.expand_dims(K.arange(0,self.max_len,dtype=tf.float32),1)
        self.div_term = K.exp(K.arange(0,self.d_model, 2,dtype='float32') * (np.log(10000.0) / self.d_model))
        self.pos_encoding[:,0::2] = K.sin(self.position * self.div_term)
        self.pos_encoding[:,1::2] = K.cos(self.position * self.div_term)
        self.pos_encoding = K.transpose(K.expand_dims(self.pos_encoding,0))
示例#29
0
    def call(self, x, **kwargs):
        mask = K.expand_dims(K.cast(K.arange(start=0, stop=K.shape(x)[1] + 1), 'float32'), axis=-1)
        bins = K.expand_dims(K.cast(K.arange(self.embedding_size // 2) * 2, 'float32'), axis=0)

        evens = K.dot(mask, 1.0 / K.pow(10000.0, bins / self.embedding_size))
        odds = tf.identity(evens)

        evens = K.sin(evens)[1:, :]
        odds = K.cos(odds)[1:, :]

        pos = K.reshape(K.stack([evens, odds], axis=2), (-1, K.shape(x)[1], self.embedding_size))
        return pos
def construct_grid(rows, cols):
    grid_x = K.arange(0, stop=cols)
    grid_x = K.reshape(grid_x, [1, -1, 1, 1])
    grid_x = K.tile(grid_x, [rows, 1, 1, 1])

    grid_y = K.arange(0, stop=rows)
    grid_y = K.reshape(grid_y, [-1, 1, 1, 1])
    grid_y = K.tile(grid_y, [1, cols, 1, 1])

    grid = K.concatenate([grid_x, grid_y])

    return grid