def yolo_head(graph, feats, anchors, num_classes): with graph.as_default(): num_anchors = len(anchors) anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) conv_dims = K.shape(feats)[1:3] conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def _attention_regularizer(self, attention): batch_size = K.cast(K.shape(attention)[0], K.floatx()) input_len = K.shape(attention)[-1] indices = K.expand_dims(K.arange(0, input_len), axis=0) diagonal = K.expand_dims(K.arange(0, input_len), axis=-1) eye = K.cast(K.equal(indices, diagonal), K.floatx()) return self.attention_regularizer_weight * K.sum( K.square( K.batch_dot(attention, K.permute_dimensions(attention, (0, 2, 1))) - eye)) / batch_size
def test_arange(self): for test_value in (-20, 0, 1, 10): t_a = KTF.arange(test_value) a = KTF.eval(t_a) assert np.array_equal(a, np.arange(test_value)) t_b = KTH.arange(test_value) b = KTH.eval(t_b) assert np.array_equal(b, np.arange(test_value)) assert np.array_equal(a, b) assert KTF.dtype(t_a) == KTH.dtype(t_b) for start, stop, step in ((0, 5, 1), (-5, 5, 2), (0, 1, 2)): a = KTF.eval(KTF.arange(start, stop, step)) assert np.array_equal(a, np.arange(start, stop, step)) b = KTH.eval(KTH.arange(start, stop, step)) assert np.array_equal(b, np.arange(start, stop, step)) assert np.array_equal(a, b) for dtype in ('int32', 'int64', 'float32', 'float64'): for backend in (KTF, KTH): t = backend.arange(10, dtype=dtype) assert backend.dtype(t) == dtype
def test_arange(self): for test_value in (-20, 0, 1, 10): t_a = KTF.arange(test_value) a = KTF.eval(t_a) assert np.array_equal(a, np.arange(test_value)) t_b = KTH.arange(test_value) b = KTH.eval(t_b) assert np.array_equal(b, np.arange(test_value)) assert np.array_equal(a, b) assert KTF.dtype(t_a) == KTH.dtype(t_b) for start, stop, step in ((0, 5, 1), (-5, 5, 2), (0, 1, 2)): a = KTF.eval(KTF.arange(start, stop, step)) assert np.array_equal(a, np.arange(start, stop, step)) b = KTH.eval(KTH.arange(start, stop, step)) assert np.array_equal(b, np.arange(start, stop, step)) assert np.array_equal(a, b) for dtype in ('int32', 'int64', 'float32', 'float64'): for backend in (KTF, KTH): t = backend.arange(10, dtype=dtype) assert backend.dtype(t) == dtype
def call(self, inputs, mask=None, **kwargs): input_len = K.shape(inputs)[1] if self.attention_type == Attention.ATTENTION_TYPE_ADD: e = self._call_additive_emission(inputs) elif self.attention_type == Attention.ATTENTION_TYPE_MUL: e = self._call_multiplicative_emission(inputs) if self.attention_activation is not None: e = self.attention_activation(e) if self.attention_width is not None: if self.history_only: lower = K.arange(0, input_len) - (self.attention_width - 1) else: lower = K.arange(0, input_len) - self.attention_width // 2 lower = K.expand_dims(lower, axis=-1) upper = lower + self.attention_width indices = K.expand_dims(K.arange(0, input_len), axis=0) e -= 10000.0 * (1.0 - K.cast(lower <= indices, K.floatx()) * K.cast(indices < upper, K.floatx())) if mask is not None: mask = K.expand_dims(K.cast(mask, K.floatx()), axis=-1) e -= 10000.0 * ((1.0 - mask) * (1.0 - K.permute_dimensions(mask, (0, 2, 1)))) # a_{t} = \text{softmax}(e_t) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) a = e / K.sum(e, axis=-1, keepdims=True) # l_t = \sum_{t'} a_{t, t'} x_{t'} v = K.batch_dot(a, inputs) if self.attention_regularizer_weight > 0.0: self.add_loss(self._attention_regularizer(a)) if self.return_attention: return [v, a] return v