Exemplo n.º 1
0
def shift(shape, stride, anchors):
    """Produce shifted anchors based on shape of the map and stride size.

    Args:
        shape: Shape to shift the anchors over.
        stride: Stride to shift the anchors with over the shape.
        anchors: The anchors to apply at each location.

    Returns:
        shifted anchors
    """
    shift_x = (K.arange(0, shape[1], dtype=K.floatx()) +
               K.constant(0.5, dtype=K.floatx())) * stride
    shift_y = (K.arange(0, shape[0], dtype=K.floatx()) +
               K.constant(0.5, dtype=K.floatx())) * stride

    shift_x, shift_y = tf.meshgrid(shift_x, shift_y)
    shift_x = K.reshape(shift_x, [-1])
    shift_y = K.reshape(shift_y, [-1])

    shifts = K.stack([shift_x, shift_y, shift_x, shift_y], axis=0)

    shifts = K.transpose(shifts)
    number_of_anchors = K.shape(anchors)[0]

    k = K.shape(shifts)[0]  # number of base points = feat_h * feat_w

    shifts = K.cast(K.reshape(shifts, [k, 1, 4]), K.floatx())
    shifted_anchors = K.reshape(anchors, [1, number_of_anchors, 4]) + shifts
    shifted_anchors = K.reshape(shifted_anchors, [k * number_of_anchors, 4])

    return shifted_anchors
Exemplo n.º 2
0
    def call(self, inputs):
        input_shape = self.in_shape
        if self.data_format == 'channels_first':
            x = K.arange(0, input_shape[1], dtype=K.floatx())
            y = K.arange(0, input_shape[2], dtype=K.floatx())
        else:
            x = K.arange(0, input_shape[0], dtype=K.floatx())
            y = K.arange(0, input_shape[1], dtype=K.floatx())

        x = x / K.max(x)
        y = y / K.max(y)

        loc_x, loc_y = tf.meshgrid(x, y, indexing='ij')

        if self.data_format == 'channels_first':
            loc = K.stack([loc_x, loc_y], axis=0)
        else:
            loc = K.stack([loc_x, loc_y], axis=-1)

        location = K.expand_dims(loc, axis=0)
        if self.data_format == 'channels_first':
            location = K.permute_dimensions(location, pattern=[0, 2, 3, 1])

        location = tf.tile(location, [K.shape(inputs)[0], 1, 1, 1])

        if self.data_format == 'channels_first':
            location = K.permute_dimensions(location, pattern=[0, 3, 1, 2])

        return location
Exemplo n.º 3
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
Exemplo n.º 4
0
def positional_signal(hidden_size: int,
                      length: int,
                      min_timescale: float = 1.0,
                      max_timescale: float = 1e4):
    """
    Helper function, constructing basic positional encoding.
    The code is partially based on implementation from Tensor2Tensor library
    https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/layers/common_attention.py
    """

    if hidden_size % 2 != 0:
        raise ValueError(
            f"The hidden dimension of the model must be divisible by 2."
            f"Currently it is {hidden_size}")
    position = K.arange(0, length, dtype=K.floatx())
    num_timescales = hidden_size // 2
    log_timescale_increment = K.constant(
        (np.log(float(max_timescale) / float(min_timescale)) /
         (num_timescales - 1)),
        dtype=K.floatx())
    inv_timescales = (min_timescale * K.exp(
        K.arange(num_timescales, dtype=K.floatx()) * -log_timescale_increment))
    scaled_time = K.expand_dims(position, 1) * K.expand_dims(inv_timescales, 0)
    signal = K.concatenate([K.sin(scaled_time), K.cos(scaled_time)], axis=1)
    return K.expand_dims(signal, axis=0)
Exemplo n.º 5
0
    def call(self, inputs, mask=None, training=None):
        inputs, relatives, memories, bias_context, bias_relative = inputs
        full = K.concatenate([memories, inputs], axis=1)      # (batch, prev_len + seq_len, units)
        w_q = K.dot(inputs, self.kernel_q)                    # (batch, seq_len, units)
        w_kv = K.dot(full, self.kernel_kv)                    # (batch, prev_len + seq_len, units * 2)
        w_r = K.dot(relatives, self.kernel_r)                 # (batch, prev_len + seq_len, units)
        if self.use_bias:
            w_q = K.bias_add(w_q, self.bias_q)
            w_kv = K.bias_add(w_kv, self.bias_kv)
            w_r = K.bias_add(w_r, self.bias_r)
        if self.activation is not None:
            w_q = self.activation(w_q)
            w_kv = self.activation(w_kv)
            w_r = self.activation(w_r)

        w_k = w_kv[:, :, :self.units]                         # (batch, prev_len + seq_len, units)
        w_v = w_kv[:, :, self.units:]                         # (batch, prev_len + seq_len, units)

        w_qc = K.bias_add(w_q, bias_context)
        w_qc = self._reshape_to_batches(w_qc)                 # (batch * n_head, seq_len, units_head)
        w_k = self._reshape_to_batches(w_k)                   # (batch * n_head, prev_len + seq_len, units_head)
        a_context = K.batch_dot(w_qc, w_k, axes=2)            # (batch * n_head, seq_len, prev_len + seq_len)

        w_qr = K.bias_add(w_q, bias_relative)
        w_qr = self._reshape_to_batches(w_qr)                 # (batch * n_head, seq_len, units_head)
        w_r = self._reshape_to_batches(w_r)                   # (batch * n_head, prev_len + seq_len, units_head)
        a_relative = K.batch_dot(w_qr, w_r, axes=2)           # (batch * n_head, seq_len, prev_len + seq_len)
        a_relative = self._relative_shift(a_relative)         # (batch * n_head, seq_len, prev_len + seq_len)

        att = (a_context + a_relative) / K.sqrt(K.constant(self.units_head, dtype=K.floatx()))
        exp = K.exp(att - K.max(att, axis=-1, keepdims=True))

        q_len, k_len = K.shape(w_q)[1], K.shape(w_k)[1]
        indices = K.expand_dims(K.arange(0, k_len), axis=0)
        upper = K.expand_dims(K.arange(k_len - q_len, k_len), axis=-1)
        exp *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0)
        if mask is not None and mask[0] is not None:
            mask = K.cast(mask[0], K.floatx())
            mask = K.concatenate([K.ones_like(memories[:, :, 0]), mask], axis=1)
            exp *= K.expand_dims(self._reshape_mask(mask), axis=1)

        att = exp / K.sum(exp, axis=-1, keepdims=True)
        if self.att_drop_layer is not None:
            att = self.att_drop_layer(att, training=training)
        w_v = self._reshape_to_batches(w_v)                   # (batch * n_head, prev_len + seq_len, units_head)
        w_o = K.batch_dot(att, w_v)                           # (batch * n_head, seq_len, units_head)

        w_o = self._reshape_from_batches(w_o)                 # (batch, seq_len, units)
        w_o = K.dot(w_o, self.kernel_o)                       # (batch, seq_len, units)
        if self.use_bias:
            w_o = K.bias_add(w_o, self.bias_o)
        if self.activation is not None:
            w_o = self.activation(w_o)

        # Add shape information to tensor when using `tf.keras`
        input_shape = K.int_shape(inputs)
        if input_shape[1] is not None:
            w_o = K.reshape(w_o, (-1,) + input_shape[1:])
        return w_o
Exemplo n.º 6
0
 def call(self, inputs, **kwargs):
     length = K.shape(inputs[0])[1] + K.shape(inputs[1])[1]
     inputs = K.tile(
         K.expand_dims(K.arange(length - 1, -1, -1, dtype=K.floatx()), axis=0),
         [K.shape(inputs[0])[0], 1],
     )
     if self.clamp_len is not None:
         inputs = K.clip(inputs, min_value=0, max_value=self.clamp_len)
     inputs = K.expand_dims(inputs, axis=-1)
     output_dim = K.cast(self.output_dim, K.floatx())
     ranges = K.expand_dims(K.arange(0.0, self.output_dim, 2.0), axis=0) / output_dim
     inverse = 1.0 / K.pow(10000.0, ranges)
     positions = inputs * inverse
     return K.concatenate([K.sin(positions), K.cos(positions)], axis=-1)
Exemplo n.º 7
0
def yolo_parse_output(yolo_output=None,
                      anchors=None,
                      num_classes=7,
                      input_shape=None):
    anchor_reshape = tf.reshape(anchors,
                                shape=(1, 1, 1, tf.shape(anchors)[0], 2))
    anchor_reshape = tf.cast(anchor_reshape, dtype=K.dtype(yolo_output))
    output_shape = tf.shape(yolo_output)
    height_index = K.arange(0, stop=output_shape[1])
    width_index = K.arange(0, stop=output_shape[2])
    tmp1, tmp2 = tf.meshgrid(height_index, width_index)
    conv_index = tf.reshape(tf.concat([tmp1, tmp2], axis=0),
                            (2, output_shape[1], output_shape[2]))
    conv_index = tf.transpose(conv_index, (1, 2, 0))
    conv_index = K.expand_dims(K.expand_dims(conv_index, 0),
                               -2)  # shape will be (1, 13, 13, 1, 2)
    conv_index = K.cast(conv_index, K.dtype(yolo_output))

    yolo_output = tf.reshape(yolo_output,
                             shape=(-1, output_shape[1], output_shape[2],
                                    tf.shape(anchors)[0], 5 + num_classes))
    box_xy = yolo_output[..., :2]
    box_wh = yolo_output[..., 2:4]
    box_confidence = yolo_output[..., 4:5]
    box_classes = yolo_output[..., 5:]
    box_xy_sig = tf.sigmoid(box_xy)
    box_wh_coord = box_wh
    box_xy = (box_xy_sig + conv_index) / tf.cast(output_shape[1:3],
                                                 dtype=K.dtype(yolo_output))
    box_wh = tf.exp(box_wh) * anchor_reshape / tf.cast(
        input_shape, dtype=K.dtype(yolo_output))
    box_confidence = tf.sigmoid(box_confidence)
    box_classes = tf.sigmoid(box_classes)

    box_coord = K.concatenate((box_xy_sig, box_wh_coord), axis=-1)
    return box_xy, box_wh, box_confidence, box_classes, box_coord
Exemplo n.º 8
0
 def call(self, x, **kwargs):
     if (self.size is None) or (self.mode == 'sum'):
         self.size = int(x.shape[-1])
     batch_size, seq_len = K.shape(x)[0], K.shape(x)[1]
     position_j = 1. / K.pow(
         10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size)
     position_j = K.expand_dims(position_j, 0)
     # K.arange不支持变长,只好用这种方法生成
     position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1
     position_i = K.expand_dims(position_i, 2)
     position_ij = K.dot(position_i, position_j)
     position_ij = K.concatenate(
         [K.cos(position_ij), K.sin(position_ij)], 2)
     if self.mode == 'sum':
         return position_ij + x
     elif self.mode == 'concat':
         return K.concatenate([position_ij, x], 2)
Exemplo n.º 9
0
def soft_min_reg(cv, axis=None, min_disp=None, max_disp=None, labels=None):
    if axis == 1:
        cv = Lambda(lambda x: K.squeeze(x, axis=-1))(cv)
    disp_map = K.reshape(
        K.arange(min_disp,
                 max_disp - 0.000001, (max_disp - min_disp) / labels,
                 dtype="float32"), (1, 1, labels, 1))
    if axis == 1:
        output = K.conv2d(cv,
                          disp_map,
                          strides=(1, 1),
                          padding='valid',
                          data_format="channels_first")
        x = K.expand_dims(K.squeeze(output, axis=1), axis=-1)
    else:
        x = K.conv2d(cv, disp_map, strides=(1, 1), padding='valid')
    return x
Exemplo n.º 10
0
    def call(self, x, mask=None):
        if (self.size == None) or (self.mode == 'sum'):
            self.size = int(x.shape[-1])

        position_j = 1. / \
                     K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size)
        position_j = K.expand_dims(position_j, 0)

        position_i = tf.cumsum(K.ones_like(x[:, :, 0]), 1) - 1
        position_i = K.expand_dims(position_i, 2)
        position_ij = K.dot(position_i, position_j)
        outputs = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2)

        if self.mode == 'sum':
            if self.scale:
                outputs = outputs * self.size**0.5
            return x + outputs
        elif self.mode == 'concat':
            return K.concatenate([outputs, x], 2)
Exemplo n.º 11
0
 def _build_weights(self, input_shape):
     input_dim = input_shape[-1]
     d = collections.OrderedDict()
     d["input_kernel"] = self.add_weight(
         shape=(input_dim, self.units),
         name='input_kernel',
         initializer=self.kernel_initializer,
         regularizer=self.kernel_regularizer,
         constraint=self.kernel_constraint)
     d["attention_kernel"] = self.add_weight(
         shape=(self.units, self.units * 3),
         name='attention_kernel',
         initializer=self.attention_initializer,
         regularizer=self.attention_regularizer,
         constraint=self.attention_constraint)
     d["mlp_kernel"] = self.add_weight(shape=(self.units, self.units * 2),
                                       name='mlp_kernel',
                                       initializer=self.mlp_initializer,
                                       regularizer=self.mlp_regularizer,
                                       constraint=self.mlp_constraint)
     d["input_bias"] = self.add_weight(shape=(self.units, ),
                                       name='input_bias',
                                       initializer=self.bias_initializer,
                                       regularizer=self.bias_regularizer,
                                       constraint=self.bias_constraint)
     d["attention_bias"] = self.add_weight(
         shape=(self.units * 3, ),
         name='attention_bias',
         initializer=self.bias_initializer,
         regularizer=self.bias_regularizer,
         constraint=self.bias_constraint)
     d["mlp_bias"] = self.add_weight(shape=(self.units * 2, ),
                                     name='mlp_bias',
                                     initializer=self.bias_initializer,
                                     regularizer=self.bias_regularizer,
                                     constraint=self.bias_constraint)
     d["layer_norm_gamma"] = self.add_weight(
         shape=(1, self.units * 2),
         name='layer_norm_gamma',
         initializer=self.kernel_initializer)
     d["layer_norm_beta"] = self.add_weight(
         shape=(self.units * 2, ),
         name='layer_norm_beta',
         initializer=self.bias_initializer)
     if self.use_relative_position:
         d["rel_kernel"] = self.add_weight(
             shape=(self.units, self.units),
             name='rel_kernel',
             initializer=self.attention_initializer,
             regularizer=self.attention_regularizer,
             constraint=self.attention_constraint)
     i = tf.range(0, self.units, dtype=tf.float32)
     d2 = tf.floormod(i, 2)
     i2 = i - d2
     for j in range(2):
         i2 = K.expand_dims(i2, axis=0)
     i2 = tf.pow(1e+4, i2 / self.units)
     d["d2"] = d2
     d["i2"] = i2
     d["range"] = K.expand_dims(K.arange(0,
                                         self.num_memory_slots,
                                         dtype=tf.float32),
                                axis=0)
     return d
Exemplo n.º 12
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tf.Tensor
        Final convolutional layer features.
    anchors : np.array, list
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy: tf.Tensor
        (x, y) box predictions adjusted by spatial location in conv layer.
    box_wh: tf.Tensor
        (w, h) box predictions adjusted by anchors and conv spatial resolution.
    box_conf: tf.Tensor
        Probability estimate for whether each box contains any object.
    box_class_pred: tf.Tensor
        Probability distribution estimate for each box over class labels.

    """

    num_anchors = len(anchors)

    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last

    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_confidence = K.sigmoid(feats[..., 4:5])
    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_confidence, box_xy, box_wh, box_class_probs