Exemplo n.º 1
0
def _infer_network_outputs(*, sess, restored_model, num_of_anchors, anchors,
                           orig_image_width, orig_image_height,
                           model_image_width, model_image_height, img_np,
                           verbose):
    start = time.time()
    boxes = []
    prob_class = []

    for yolo_head_idx in range(len(restored_model.output)):
        yolo_head = restored_model.output[yolo_head_idx]
        yolo_head_shape = K.shape(yolo_head)
        yolo_head_num_of_cols, yolo_head_num_of_rows = yolo_head_shape[
            2], yolo_head_shape[1]

        curr_yolo_head = K.reshape(yolo_head, [
            -1, yolo_head_num_of_cols, yolo_head_num_of_rows, num_of_anchors,
            NUM_OF_BOX_PARAMS + NUM_OF_CLASSES
        ])

        grid = construct_grid(yolo_head_shape[1], yolo_head_shape[2])
        grid = K.cast(grid, dtype=K.dtype(curr_yolo_head))
        grid_size = K.cast([yolo_head_num_of_cols, yolo_head_num_of_rows],
                           dtype=K.dtype(curr_yolo_head))

        curr_boxes_xy = (K.sigmoid(curr_yolo_head[..., :2]) + grid) / grid_size

        curr_boxes_wh = K.exp(curr_yolo_head[...,
                                             2:4]) * anchors[yolo_head_idx]

        curr_prob_obj = K.sigmoid(curr_yolo_head[..., 4:5])
        curr_prob_class = K.sigmoid(curr_yolo_head[..., 5:])
        curr_prob_detected_class = curr_prob_obj * curr_prob_class

        boxes.append(
            get_corrected_boxes(box_width=curr_boxes_wh[..., 0:1],
                                box_height=curr_boxes_wh[..., 1:2],
                                box_x=curr_boxes_xy[..., 0:1],
                                box_y=curr_boxes_xy[..., 1:2],
                                orig_image_shape=(orig_image_width,
                                                  orig_image_height),
                                model_image_shape=(model_image_width,
                                                   model_image_height)))

        curr_prob_detected_class = K.reshape(curr_prob_detected_class,
                                             [-1, NUM_OF_CLASSES])
        prob_class.append(curr_prob_detected_class)

    prob_class = K.concatenate(prob_class, axis=0)
    boxes = K.concatenate(boxes, axis=0)

    out_tensors = [
        boxes,
        prob_class,
    ]

    if verbose:
        print(f'Took {time.time() - start} seconds to construct network.')

    start = time.time()
    sess_out = sess.run(out_tensors,
                        feed_dict={
                            restored_model.input: img_np,
                            K.learning_phase(): 0
                        })

    if verbose:
        print(
            f'Took {time.time() - start} seconds to infer outputs in session.')
    boxes, out_boxes_classes = sess_out
    return boxes, out_boxes_classes
Exemplo n.º 2
0
def custom_loss(y_true, y_pred):
    # define a grid of offsets
    # [[[ 0.  0.]]
    # [[ 1.  0.]]
    # [[ 0.  1.]]
    # [[ 1.  1.]]]
    grid = np.array([[[float(x), float(y)]] * nb_boxes for y in range(grid_h)
                     for x in range(grid_w)])

    # first three values are classes : cat, rat, and none.
    # However yolo doesn't predict none as a class, none is everything else and is just not predicted
    # so I don't use it in the loss
    y_true_class = y_true[..., 0:2]
    y_pred_class = y_pred[..., 0:2]

    # reshape array as a list of grid / grid cells / boxes / of 5 elements
    pred_boxes = K.reshape(y_pred[..., 3:], (-1, grid_w * grid_h, nb_boxes, 5))
    true_boxes = K.reshape(y_true[..., 3:], (-1, grid_w * grid_h, nb_boxes, 5))

    # sum coordinates of center of boxes with cell offsets.
    # as pred boxes are limited to 0 to 1 range, pred x,y + offset is limited to predicting elements inside a cell
    y_pred_xy = pred_boxes[..., 0:2] + K.variable(grid)
    # w and h predicted are 0 to 1 with 1 being image size
    y_pred_wh = pred_boxes[..., 2:4]
    # probability that there is something to predict here
    y_pred_conf = pred_boxes[..., 4]

    # same as predicate except that we don't need to add an offset, coordinate are already between 0 and cell count
    y_true_xy = true_boxes[..., 0:2]
    # with and height
    y_true_wh = true_boxes[..., 2:4]
    # probability that there is something in that cell. 0 or 1 here as it's a certitude.
    y_true_conf = true_boxes[..., 4]

    clss_loss = K.sum(K.square(y_true_class - y_pred_class), axis=-1)
    xy_loss = K.sum(K.sum(K.square(y_true_xy - y_pred_xy), axis=-1) *
                    y_true_conf,
                    axis=-1)
    wh_loss = K.sum(
        K.sum(K.square(K.sqrt(y_true_wh) - K.sqrt(y_pred_wh)), axis=-1) *
        y_true_conf,
        axis=-1)

    # when we add the confidence the box prediction lower in quality but we gain the estimation of the quality of the box
    # however the training is a bit unstable

    # compute the intersection of all boxes at once (the IOU)
    intersect_wh = K.maximum(K.zeros_like(y_pred_wh),
                             (y_pred_wh + y_true_wh) / 2 -
                             K.square(y_pred_xy - y_true_xy))
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    true_area = y_true_wh[..., 0] * y_true_wh[..., 1]
    pred_area = y_pred_wh[..., 0] * y_pred_wh[..., 1]
    union_area = pred_area + true_area - intersect_area
    iou = intersect_area / union_area

    conf_loss = K.sum(K.square(y_true_conf * iou - y_pred_conf), axis=-1)

    # final loss function
    d = xy_loss + wh_loss + conf_loss + clss_loss

    if False:
        d = tf.Print(d, [d], "loss")
        d = tf.Print(d, [xy_loss], "xy_loss")
        d = tf.Print(d, [wh_loss], "wh_loss")
        d = tf.Print(d, [clss_loss], "clss_loss")
        d = tf.Print(d, [conf_loss], "conf_loss")

    return d
Exemplo n.º 3
0
    def call(self, inputs, **kwargs):
        """
        Creates the layer as a Keras graph

        Notes:
            This does not add self loops to the adjacency matrix.

        Args:
            inputs (list): list of inputs with 4 items:
            node features (size b x N x F),
            sparse graph adjacency matrix (size N x N),
            where N is the number of nodes in the graph,
                  F is the dimensionality of node features
                  M is the number of output nodes
        """
        X = inputs[0]  # Node features (1 x N x F)
        A_sparse = inputs[1]  # Adjacency matrix (1 x N x N)

        if not isinstance(A_sparse, tf.SparseTensor):
            raise TypeError("A is not sparse")

        # Get undirected graph edges (E x 2)
        A_indices = A_sparse.indices

        batch_dim, n_nodes, _ = K.int_shape(X)
        if batch_dim != 1:
            raise ValueError(
                "Currently full-batch methods only support a batch dimension of one"
            )
        else:
            # Remove singleton batch dimension
            X = K.squeeze(X, 0)

        outputs = []
        for head in range(self.attn_heads):
            kernel = self.kernels[head]  # W in the paper (F x F')
            attention_kernel = self.attn_kernels[
                head]  # Attention kernel a in the paper (2F' x 1)

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (N x F')

            # Compute feature combinations
            # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_j]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            attn_for_self = K.dot(
                features, attention_kernel[0])  # (N x 1), [a_1]^T [Wh_i]
            attn_for_neighs = K.dot(
                features, attention_kernel[1])  # (N x 1), [a_2]^T [Wh_j]

            # Create sparse attention vector (All non-zero values of the matrix)
            sparse_attn_self = tf.gather(K.reshape(attn_for_self, [-1]),
                                         A_indices[:, 0],
                                         axis=0)
            sparse_attn_neighs = tf.gather(K.reshape(attn_for_neighs, [-1]),
                                           A_indices[:, 1],
                                           axis=0)
            attn_values = sparse_attn_self + sparse_attn_neighs

            # Add nonlinearity
            attn_values = LeakyReLU(alpha=0.2)(attn_values)

            # Apply dropout to features and attention coefficients
            dropout_feat = Dropout(self.in_dropout_rate)(features)  # (N x F')
            dropout_attn = Dropout(self.attn_dropout_rate)(
                attn_values)  # (N x N)

            # Convert to sparse matrix
            sparse_attn = tf.sparse.SparseTensor(
                A_indices, values=dropout_attn, dense_shape=[n_nodes, n_nodes])

            # Apply softmax to get attention coefficients
            sparse_attn = tf.sparse.softmax(
                sparse_attn)  # (N x N), Eq. 3 of the paper

            # Linear combination with neighbors' features [YT: see Eq. 4]
            node_features = tf.sparse.sparse_dense_matmul(
                sparse_attn, dropout_feat)  # (N x F')

            if self.use_bias:
                node_features = K.bias_add(node_features, self.biases[head])

            # Add output of attention head to final output
            outputs.append(node_features)

        # Aggregate the heads' output according to the reduction method
        if self.attn_heads_reduction == "concat":
            output = K.concatenate(outputs)  # (N x KF')
        else:
            output = K.mean(K.stack(outputs), axis=0)  # N x F')

        output = self.activation(output)

        # Add batch dimension back if we removed it
        if batch_dim == 1:
            output = K.expand_dims(output, 0)
        return output
Exemplo n.º 4
0
 def __call__(self, shape, dtype=None, partition_info=None):
     w = self.base_initializer(shape=shape, dtype=dtype)
     u = K.random_uniform(shape=tuple([1, shape[-1]]), dtype=dtype)
     w_bar, _u, sigma = spectral_normalization(w, u, self.niter_spectral)
     w_bar = bjorck_normalization(w_bar, self.niter_bjorck)
     return K.reshape(w_bar, shape)
Exemplo n.º 5
0
	def call(self, x, mask=None):
		a = KB.permute_dimensions(x, (1,0,2))
		a = KB.reshape(a, (x.shape[1] *x.shape[0], x.shape[2]))
		a = ifft(a)
		a = KB.reshape(a, (x.shape[1], x.shape[0], x.shape[2]))
		return KB.permute_dimensions(a, (1,0,2))
Exemplo n.º 6
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_xy, box_wh, box_confidence, box_class_probs
Exemplo n.º 7
0
def gather_nd_reshape(t, indices, final_shape):
    h = tf.gather_nd(t, indices)
    return K.reshape(h, final_shape)
def yolo_loss(inputs, num_anchors):
    ignore_thresh = .5 # Порог вероятности обнаружения объекта
    num_layers = num_anchors // 3 # Подсчитываем количество анкоров на каждом уровне сетки
    y_pred = inputs[:num_layers] # Из входных данных выцепляем посчитанные моделью значения
    y_true = inputs[num_layers:] # Из входных данных выцепляем эталонные значения
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # Задаем маску анкоров для каждого уровня сеток
   
    
    # Получаем размерность входного изображения ( (13 х 13) * 32 = (416 х 416)) и приводим к типу элемента y_true[0]
    input_shape = K.cast(K.shape(y_pred[0])[1:3] * 32, K.dtype(y_true[0])) 
    
    # Получаем двумерный массив, соответствующий размерностям сеток ((13, 13), (26, 26), (52, 52))
    grid_shapes = [K.cast(K.shape(y_pred[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
    
    loss = 0 # Значение ошибки
    
    # Считываем количество элементов
    m = K.shape(y_pred[0])[0] # Размер пакета
    batch_size = K.cast(m, K.dtype(y_pred[0])) # Преобразуем к типу y_pred[0]
    
    for l in range(num_layers): # Пробегаем по всем трем уровням сеток
        # Получаем маску для сетки l-го уровня по вероятности определения объекта (5-ый параметр в списке общих параметров). 
        # В массиве object_mask будут значения, которые соответствуют только вероятности обнаружения объекта
        object_mask = y_true[l][..., 4:5] # Вернется набор данных вида ([0][0][0][0]...[1]...[0])
        
        # Получаем аналогичную выборку для сетки l-го уровня с OHE (где записана позиция нашего класса)
        # В массиве true_class будут значения, которые соответствуют только OHE представлению класса для данного уровня анкоров
        true_class = y_true[l][..., 5:] # Вернется набор данных вида ([0][0][0][0]...[1]...[0])
        
        num_sub_anchors = len(anchors[anchor_mask[l]]) # Получаем количество анкоров для отдельного уровян сетки (3)
        
        # Решейпим анкоры отдельного уровня сетки и записываем в переменную anchors_tensor
        anchors_tensor = K.reshape(K.constant(anchors[anchor_mask[l]]), [1, 1, 1, num_sub_anchors, 2])
        
        # Создаем двумерный массив grid со значениями [[[0, 0] , [0, 1] , [0, 2] , ... , [0, k]], 
        #                                             [[1, 0] , [1, 1] , [1, 2] , ... , [1 ,k]],
        #                                             ...
        #                                             [[k, 0] , [k, 1] , [k, 2] , ... , [k, k]]]
        # где k - размерность сетки. Массив хранит индексы ячеек сетки
        grid_shape = K.shape(y_pred[l])[1:3] # Получаем ширину и высоту сетки
        grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),[1, grid_shape[1], 1, 1]) # Создаем вертикальную линию
        grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),[grid_shape[0], 1, 1, 1]) # Создаем горизонтальную линию
        grid = K.concatenate([grid_x, grid_y]) # Объединяем 
        grid = K.cast(grid, K.dtype(y_pred[l])) # Приводим к типу y_pred[l]
        
        # Решейпим y_pred[l]
        feats = K.reshape(y_pred[l], [-1, grid_shape[0], grid_shape[1], num_sub_anchors, num_classes + 5]) 
        
        # Считаем ошибку в определении координат центра объекта
        # Получаем координаты центра объекта из спредиктенного значения
        pred_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) 
        # Производим обратные вычисления для оригинальных значений из y_true для координат центра объекта
        true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid  # Реальные координаты центра bounding_box
        box_loss_scale = 2 - y_true[l][...,2:3] * y_true[l][...,3:4] # чем больше бокс, тем меньше ошибка
        # binary_crossentropy для истинного значения и спредиктенного (obect_mask для подсчета только требуемого значения)
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(true_xy, feats[...,0:2], from_logits=True)

        # Считаем ошибку в определении координат ширины и высоты
        # Получаем значения ширины и высоты изображения из спредиктенного значения   
        pred_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) 
        # Производим обратные вычисления для оригинальных значений из y_true для ширины и высоты объекта
        true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) 
        # Оставляем значение высоты и ширины только у тех элементов, где object_mask = 1
        true_wh = K.switch(object_mask, true_wh, K.zeros_like(true_wh)) 
        # Считаем значение ошибки в определении высоты и ширины
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(true_wh-feats[...,2:4])
        
        # Объединяем значения в один  массив
        pred_box = K.concatenate([pred_xy, pred_wh]) 
        
        # Считаем ошибку в определении обнаружения какого-либо класса
        # Для этого вначале надо отсечь все найденные объекты, вероятность которых меньше установленного значения ignore_thresh
        
        # Определяем массив, который будет хранить данные о неподходящих значениях
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) 
        object_mask_bool = K.cast(object_mask, 'bool') # Приводим тип object_mask к типу 'bool'
        
        # Функция, определяющая данные, которые требуется игнорировать
        # Пробегаем по всем элементам пакета (b<m)
        # Получаем параметры реального bounding_box для текущей ячейки
        # Считаем IoU реального и спредиктенного
        # В зависимости от best_iou < ignore_thresh помечаем его как верно распознанный или неверено
        def loop_body(
                b,
                ignore_mask
                ):
            # в true_box запишутся первые 4 параметра (центр, высота и ширина объекта) того элемента, значение которого в object_mask_bool равно True
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) 
            # Подсчитываем iou для спредиктенной ограничивающей рамки (pred_box) и оригинальной (true_box)
            iou = calc_iou(pred_box[b], true_box) 
            # Находим лучшую ограничивающую рамку
            best_iou = K.max(iou, axis=-1) 
            # Записываем в ignore_mask true или false в зависимости от (best_iou < ignore_thresh)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) 
            return b+1, ignore_mask # Увеличиваем счетчик на единицу и возвращаем ignore_mask
        
        # Пробегаем в цикле по всем элементам в пределах значения m (m = batch size)
        _, ignore_mask = tf.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) 
        ignore_mask = ignore_mask.stack() # Приводим ignore_mask к тензору
        ignore_mask = K.expand_dims(ignore_mask, -1) # Добавляем еще одну размерность в конце ignore_mask
                 
        # Считаем значение ошибки
        # 1 компонент - для значений, которые были верно спредиктены
        # 2 компонент - для значения, которые были неверно спредиктены
        confidence_loss = (
            object_mask * K.binary_crossentropy(object_mask, feats[...,4:5], from_logits=True) +
            (1-object_mask) * K.binary_crossentropy(object_mask, feats[...,4:5], from_logits=True) * ignore_mask
            )
        
        # Считаем ошибку в определении класса объекта
        class_loss = object_mask * K.binary_crossentropy(true_class, feats[...,5:], from_logits=True)
    
        # Считаем суммарную ошибку
        xy_loss = K.sum(xy_loss) / batch_size
        wh_loss = K.sum(wh_loss) / batch_size
        confidence_loss = K.sum(confidence_loss) / batch_size
        class_loss = K.sum(class_loss) / batch_size
        loss += xy_loss + wh_loss + confidence_loss + class_loss
                
    return loss # Возвращаем значение ошибки
Exemplo n.º 9
0
def hw_flatten(x):
    # Input shape x: [BATCH, HEIGHT, WIDTH, CHANNELS]
    # flat the feature volume across the tensor width and height
    shape = K.int_shape(x)
    return K.reshape(
        x, [shape[0], -1, shape[-1]])  # return [BATCH, W*H, CHANNELS]
Exemplo n.º 10
0
    def call(self, x):
        #print("!",x[0].shape,x[1].shape)
        mat = x[0]
        val = x[1]
        con = val[:, :, :self.keepconst]
        var = val[:, :, self.keepconst:]

        mata = K.constant(self.getmata(self.gs))
        matb = K.constant(self.getmatb(self.gs))

        for i in range(self.iterations):

            #print("val",val.shape)

            valp = K.permute_dimensions(val, (0, 2, 1))

            #print("valp",valp.shape)

            vA = K.dot(valp, mata)
            vB = K.dot(valp, matb)

            #print("vA",vA.shape,"vB",vB.shape)

            feat = K.permute_dimensions(vA, (0, 2, 1))
            diff = K.permute_dimensions(vB - vA, (0, 2, 1))

            #print("feat",feat.shape,"diff",diff.shape)

            premlp = K.concatenate((feat, diff), axis=-1)

            #print("premlp",premlp.shape)

            postmlp = premlp[:, :, :self.param - self.keepconst]
            #postmlp=self.mlp(premlp)

            #print("postmlp",postmlp.shape)

            ppmlp = K.permute_dimensions(postmlp, (0, 2, 1))

            #print("ppmlp",ppmlp.shape)

            res = K.reshape(
                ppmlp, (-1, self.param - self.keepconst, self.gs, self.gs))

            #print("res",res.shape)
            #print("mat",mat.shape)

            resp = K.permute_dimensions(res, (1, 0, 2, 3))

            presmat = resp * mat  #kinda wondering that this(resp*mat) actually works...tja it actually does not

            #print("presmat",presmat.shape)

            resmat = K.permute_dimensions(presmat, (1, 0, 2, 3))
            #resmat=K.permute_dimensions(presmat,(1,2,0,3))

            print("resmat", resmat.shape)

            #exit()

            summ = K.sum(resmat, axis=-1)  #/msumtra

            print("summ", summ.shape)

            #print("summ",summ.shape)

            #print("pre",summ.shape)
            summ /= self.k
            #print("post",summ.shape)

            #exit()

            #print("permuted from",summ.shape)

            #var=K.permute_dimensions(summ,(2,0,1))

            var = K.permute_dimensions(
                summ, (0, 2, 1)
            )  #hopefully implemented this into the resmat permute, nope, not diffbar

            #print("permuted to",var.shape)

            print("var", var.shape)
            #print("con",con.shape)

            #exit()

            if self.activate:
                var = self.advrelu(var, self.activation)

            val = K.concatenate((con, var), axis=-1)

            #print("concatting",con.shape,var.shape,"=>",val.shape)
            #print("val",val.shape)

            continue

        #print("returning",val.shape)
        #exit()

        return val
Exemplo n.º 11
0
def train_lstm(n_symbols, embedding_matrix, config):
    #(batch_size,max_group_nums,max_post_nums,max_seq_len)
    max_group_nums = config.max_group_nums
    max_post_nums = config.max_post_nums
    max_seq_len = config.max_seq_len
    main_input = Input(shape=(max_group_nums, max_post_nums, max_seq_len))
    sub_input = Input(shape=(max_group_nums, social_feature_nums))

    embedding_layer_main = Embedding(input_dim=n_symbols,
                                     output_dim=config.embeddingSize,
                                     weights=[embedding_matrix],
                                     input_length=max_seq_len,
                                     mask_zero=True)(main_input)
    dropout_layer_1 = Dropout(config.dropoutKeepProb)(embedding_layer_main)
    #shape==(batch_size,max_group_nums,max_post_nums,max_seq_len,embeddingSize)
    bid_GRU_layer_1 = Bidirectional(
        GRU(32,
            activation="tanh",
            recurrent_dropout=0.5,
            return_sequences=True),
        merge_mode='concat')(backend.reshape(
            dropout_layer_1, shape=[-1, max_seq_len, config.embeddingSize]))
    #shape==(batch_size*max_group_nums*max_post_nums,max_seq_len,64)
    bn_layer_1 = BatchNormalization()(bid_GRU_layer_1)
    attention_layer_1 = AttentionLayer()(
        bn_layer_1)  #(batch_size*max_group_nums*max_post_nums,64)
    bid_GRU_layer_2 = Bidirectional(GRU(32,
                                        activation='tanh',
                                        dropout=0.5,
                                        recurrent_dropout=0.5,
                                        return_sequences=True),
                                    merge_mode='concat')(backend.reshape(
                                        attention_layer_1,
                                        shape=[-1, max_post_nums, 64]))
    #shape==(batch_size*max_group_nums,max_post_nums,64)
    bn_layer_2 = BatchNormalization()(bid_GRU_layer_2)
    attention_layer_2 = AttentionLayer()(
        bn_layer_2)  #(batch_size*max_group_nums,64)

    bid_GRU_layer_3 = Bidirectional(GRU(32,
                                        activation='tanh',
                                        dropout=0.5,
                                        recurrent_dropout=0.5,
                                        return_sequences=True),
                                    merge_mode='concat')(backend.reshape(
                                        attention_layer_2,
                                        shape=[-1, max_group_nums, 64]))
    bn_layer_3 = BatchNormalization()(concatenate([bid_GRU_layer_3, sub_input],
                                                  axis=2))
    attention_layer_3 = AttentionLayer()(
        bn_layer_3)  #(batch_size,64+social_feature_nums)

    dense_layer_1 = Dense(64, activation="tanh")(
        sub_input)  #(batch_size,max_group_nums,64)
    social_attention_layer = AttentionLayer()(dense_layer_1)  #(batch_size,64)

    merge_layer = concatenate([attention_layer_3, social_attention_layer],
                              axis=1)  #(bathc_size,128+social_attention_layer)
    dropout_layer = Dropout(config.dropoutKeepProb)(merge_layer)
    output_layer = Dense(2, activation='softmax')(Dense(
        32, activation="tanh")(dropout_layer))

    model = Model([main_input, sub_input], output_layer)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model
Exemplo n.º 12
0
def sinkhorn_loss(y_true, y_pred):
    y_true = K.cast(y_true, y_pred.dtype)
    y_pred = K.reshape(y_pred, (-1,48,1))
    y_true = K.reshape(y_true, (-1,48,1))
    cc = tf.concat([y_true, y_pred], axis=2)
    return K.mean( tf.map_fn(myfunc, cc), axis=(-1) )
Exemplo n.º 13
0
def display_yolo(image, yolo_model, score_threshold, iou_threshold,\
                 train_batch_size =16, grid_h =8, grid_w =8, image_h =256, image_w =256, anchors =[0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828],\
                 plot = False):
    '''
    Display predictions from YOLO model.

    Parameters
    ----------
    - file : string list : list of images path.
    - yolo_model : YOLO model.
    - score_threshold : threshold used for filtering predicted bounding boxes.
    - iou_threshold : threshold used for non max suppression.
    '''

    # load image
    
    input_image = image[:,:,::-1]
    input_image = image / 255.
    input_image = np.expand_dims(input_image, 0)

    # prediction
    y_pred = yolo_model.predict_on_batch(input_image)

    # post prediction process
    # grid coords tensor
    coord_x = tf.cast(tf.reshape(tf.tile(tf.range(grid_w), [grid_h]), (1, grid_h, grid_w, 1, 1)), tf.float32)
    coord_y = tf.transpose(coord_x, (0,2,1,3,4))
    coords = tf.tile(tf.concat([coord_x,coord_y], -1), [train_batch_size, 1, 1, 5, 1])
    dims = K.cast_to_floatx(K.int_shape(y_pred)[1:3])
    dims = K.reshape(dims,(1,1,1,1,2))
    # anchors tensor
    anchors = np.array(anchors)
    anchors = anchors.reshape(len(anchors) // 2, 2)
    # pred_xy and pred_wh shape (m, grid_w, grid_h, Anchors, 2)
    pred_xy = K.sigmoid(y_pred[:,:,:,:,0:2])
    pred_xy = (pred_xy + coords)
    pred_xy = pred_xy / dims
    pred_wh = K.exp(y_pred[:,:,:,:,2:4])
    pred_wh = (pred_wh * anchors)
    pred_wh = pred_wh / dims
    # pred_confidence
    box_conf = K.sigmoid(y_pred[:,:,:,:,4:5])  
    # pred_class
    box_class_prob = K.softmax(y_pred[:,:,:,:,5:])

    # Reshape
    pred_xy = pred_xy[0,...]
    pred_wh = pred_wh[0,...]
    box_conf = box_conf[0,...]
    box_class_prob = box_class_prob[0,...]

    # Convert box coords from x,y,w,h to x1,y1,x2,y2
    box_xy1 = pred_xy - 0.5 * pred_wh
    box_xy2 = pred_xy + 0.5 * pred_wh
    boxes = K.concatenate((box_xy1, box_xy2), axis=-1)

    # Filter boxes
    box_scores = box_conf * box_class_prob
    box_classes = K.argmax(box_scores, axis=-1) # best score index
    box_class_scores = K.max(box_scores, axis=-1) # best score
    prediction_mask = box_class_scores >= score_threshold
    boxes = tf.boolean_mask(boxes, prediction_mask)
    scores = tf.boolean_mask(box_class_scores, prediction_mask)
    classes = tf.boolean_mask(box_classes, prediction_mask)

    # Scale box to image shape
    boxes = boxes * image_h

    # Non Max Supression
    selected_idx = tf.image.non_max_suppression(boxes, scores, 50, iou_threshold=iou_threshold)
    boxes = K.gather(boxes, selected_idx)
    scores = K.gather(scores, selected_idx)
    classes = K.gather(classes, selected_idx)
    
    if plot:
        # Draw image
        plt.figure(figsize=(2,2))
        f, (ax1) = plt.subplots(1,1, figsize=(10, 10))
        ax1.imshow(image[:,:,::-1])
        count_detected = boxes.shape[0]
        ax1.set_title('Detected objects count : {}'.format(count_detected))
        for i in range(count_detected):
            box = boxes[i,...]
            x = box[0]
            y = box[1]
            w = box[2] - box[0]
            h = box[3] - box[1]
            classe = classes[i].numpy()
            if classe == 0:
                color = (0, 1, 0)
            else:
                color = (1, 0, 0)
            rect = patches.Rectangle((x.numpy(), y.numpy()), w.numpy(), h.numpy(), linewidth = 3, edgecolor=color,facecolor='none')
            ax1.add_patch(rect)
            
    return boxes, scores, classes
Exemplo n.º 14
0
def testBackEnd():
    x = tf.zeros([3, 4], tf.int32)
    print('x=', x)
    x = tf.zeros((3, 4), tf.int32)
    print('x=', x)
    return

    a = tf.constant([1, 2, 3, 4, 5, 6, 7, 8], dtype=tf.float32)
    a = K.reshape(a, (4, 4))
    print('a=', a)

    a = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
    #a = K.abs(-1)
    print('a=', type(a), a)  #a= tf.Tensor(1, shape=(), dtype=int32)
    # a = a.numpy()
    # print('a=',a)

    # a = tf.zeros([0, 3])
    # a = tf.concat([a, [[1, 2, 3], [5, 6, 8]]], axis=0)
    # print('a=',type(a),a)

    b = tf.constant([[1, 8], [2, 3]], dtype=tf.float32)

    c = K.square(a - b)
    print('c=', c)
    d = K.sum(c, axis=0)
    print('d=', d)
    d = K.sum(c, axis=1)
    print('d=', d)

    d = K.sum(c, axis=[0, 1])
    print('d=', d)
    return

    a = K.abs([-1, 0, 9, -10])
    print('a=', a)  #a= tf.Tensor([ 1  0  9 10], shape=(4,), dtype=int32)

    a = K.abs(np.array([-1, 0, 9, -10]))
    print('a=', a)  #a= tf.Tensor([ 1  0  9 10], shape=(4,), dtype=int32)

    a = K.all(np.array([-1, 0, 9, -10]), axis=0)
    print('a=', a)  #a= tf.Tensor(False, shape=(), dtype=bool)

    a = K.all(np.array([[-1, -2, -1], [-1, 0, 9]]), axis=0)  #x axis
    print('a=', a)  #a= tf.Tensor([ True False  True], shape=(3,), dtype=bool)
    a = K.all(np.array([[-1, -2, -1], [-1, 0, 9]]), axis=1)  #y axis
    print('a=', a)  #a= tf.Tensor([ True False], shape=(2,), dtype=bool)

    a = K.arange(1, 100, 10)
    print(
        'a=', a
    )  #a= tf.Tensor([ 1 11 21 31 41 51 61 71 81 91], shape=(10,), dtype=int32)

    a = K.sum(np.array([-1, 0, 9, -10]))
    print('a=', a)  #a= tf.Tensor(-2, shape=(), dtype=int32)

    a = K.square(np.array([-1, 0, 9, -10]))
    print('a=', a)  #a= tf.Tensor([  1   0  81 100], shape=(4,), dtype=int32)

    x = K.placeholder(shape=(2, 3))
    y = K.placeholder(shape=(3, 4))
    xy = K.dot(x, y)
    shape = K.int_shape(xy)
    print('xy=', xy)  #xy= Tensor("MatMul:0", shape=(2, 4), dtype=float32)
    print('xy shape=', shape)  #xy shape= (2, 4)

    kvar = K.eye(3)
    #K.eval(kvar)
    print('kvar=', kvar)
    '''
    array([[1., 0., 0.],
            [0., 1., 0.],
            [0., 0., 1.]], dtype=float32)>
    '''

    a = np.array([[1, 2], [3, 4]])
    a = K.transpose(a)
    print('a=', a)
    '''
    a= tf.Tensor(
            [[1 3]
            [2 4]], shape=(2, 2), dtype=int32)
    '''

    a = K.clip(np.array([-1, 0, 1, 2, 3, 4, 5]), min_value=0, max_value=3)
    print('a=', a)  #a= tf.Tensor([0 0 1 2 3 3 3], shape=(7,), dtype=int32)
Exemplo n.º 15
0
    def call(self, inputs, **kwargs):
        input_shape = K.int_shape(inputs)
        sequence_length, d_model = input_shape[-2:]
        # output of the "sigmoid halting unit" (not the probability yet)
        halting = K.sigmoid(
            K.reshape(
                K.bias_add(
                    K.dot(K.reshape(inputs, [-1, d_model]),
                          self.halting_kernel),
                    self.halting_biases,
                    data_format='channels_last'),
                [-1, sequence_length]))
        if self.zeros_like_halting is None:
            self.initialize_control_tensors(halting)
        # useful flags
        step_is_active = K.greater(self.halt_budget, 0)
        no_further_steps = K.less_equal(self.halt_budget - halting, 0)
        # halting probability is equal to
        # a. halting output if this isn't the last step (we have some budget)
        # b. to remainder if it is,
        # c. and zero for the steps that shouldn't be executed at all
        #    (out of budget for them)
        halting_prob = K.switch(
            step_is_active,
            K.switch(
                no_further_steps,
                self.remainder,
                halting),
            self.zeros_like_halting)
        self.active_steps += K.switch(
            step_is_active,
            self.ones_like_halting,
            self.zeros_like_halting)
        # We don't know which step is the last, so we keep updating
        # expression for the loss with each call of the layer
        self.ponder_cost = (
            self.time_penalty_t * K.mean(self.remainder + self.active_steps))
        # Updating "the remaining probability" and the halt budget
        self.remainder = K.switch(
            no_further_steps,
            self.remainder,
            self.remainder - halting)
        self.halt_budget -= halting  # OK to become negative

        # If none of the inputs are active at this step, then instead
        # of zeroing them out by multiplying to all-zeroes halting_prob,
        # we can simply use a constant tensor of zeroes, which means that
        # we won't even calculate the output of those steps, saving
        # some real computational time.
        if self.zeros_like_input is None:
            self.zeros_like_input = K.zeros_like(
                inputs, name='zeros_like_input')
        # just because K.any(step_is_active) doesn't work in PlaidML
        any_step_is_active = K.greater(
            K.sum(K.cast(step_is_active, 'int32')), 0)
        step_weighted_output = K.switch(
            any_step_is_active,
            K.expand_dims(halting_prob, -1) * inputs,
            self.zeros_like_input)
        if self.weighted_output is None:
            self.weighted_output = step_weighted_output
        else:
            self.weighted_output += step_weighted_output
        return [inputs, self.weighted_output]
Exemplo n.º 16
0
def build_model(hidden_dim, max_seq_len, vocabulary_size):
    ## encoder Input and layers
    encoder_in = Input((max_seq_len, ), dtype='int32', name='encoder_in')
    ith_str = Input((1, ), dtype='int32', name='ith_str')
    word = Input((1, ), dtype='int32', name='word')
    OneHot = Lambda(lambda x: K.one_hot(x, vocabulary_size), name='OneHot')

    ## building encoder
    encoder_in_and_word = Concatenate()([ith_str, word, encoder_in])
    encoder_GRU = GRU(hidden_dim, return_state=True, return_sequences=True)
    encoder_out, state = encoder_GRU(OneHot(encoder_in_and_word))
    encoder_out_dup = RepeatVector(max_seq_len)(encoder_out[:, -1])

    ## decoder Input and layers
    decoder_in = Input((max_seq_len, ), dtype='int32', name='decoder_in')
    ith = Input((1, ), dtype='int32', name='ith')
    decoder_GRU = GRU(hidden_dim, return_sequences=True, return_state=True)
    decoder_Dense = Dense(vocabulary_size,
                          activation='softmax',
                          name='decoder_out')

    ## building decoder
    ith_dup = RepeatVector(max_seq_len)(K.cast(ith, 'float'))
    word_dup = K.reshape(RepeatVector(max_seq_len)(word), (-1, max_seq_len))
    x = Concatenate()(
        [ith_dup,
         OneHot(word_dup),
         OneHot(decoder_in), encoder_out_dup])
    x, _ = decoder_GRU(x, initial_state=state)
    decoder_out = decoder_Dense(x)

    ## get the specific word
    gather = K.concatenate(
        [K.reshape(tf.range(K.shape(decoder_out)[0]), (-1, 1)), ith])
    specific_word = tf.gather_nd(decoder_out, gather)
    specific_word = Lambda(tf.identity, name='word_out')(
        specific_word
    )  # Add this layer because the name of tf.gather_nd is too ugly

    model = Model([encoder_in, decoder_in, ith, ith_str, word],
                  [decoder_out, specific_word])

    ## building decoder model given encoder_out and states
    decoder_in_one_word = Input((1, ),
                                dtype='int32',
                                name='decoder_in_one_word')
    decoder_state_in = Input((hidden_dim, ), name='decoder_state_in')
    encoder_out = Input((hidden_dim, ), name='decoder_encoder_out')
    x = Concatenate()([
        K.cast(ith, 'float')[:, tf.newaxis],
        OneHot(word),
        OneHot(decoder_in_one_word), encoder_out[:, tf.newaxis]
    ])
    x, decoder_state = decoder_GRU(x, initial_state=decoder_state_in)
    decoder_out = decoder_Dense(x)
    decoder_model = Model(
        [decoder_in_one_word, encoder_out, decoder_state_in, ith, word],
        [decoder_out, decoder_state])

    encoder_in = Input((None, ), dtype='int32')
    encoder_in_and_word = Concatenate()([ith_str, word, encoder_in])
    encoder_out, state = encoder_GRU(OneHot(encoder_in_and_word))
    encoder_model = Model([encoder_in, ith_str, word], [encoder_out, state])
    return model, encoder_model, decoder_model
Exemplo n.º 17
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]),
                               axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum +
                        coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss
Exemplo n.º 18
0
def custom_loss(target, output):
    output **= (1 / hp.T)
    output /= K.reshape(K.sum(output, axis=1), (-1, 1))
    return (hp.T)**2 * K.categorical_crossentropy(target, output)
Exemplo n.º 19
0
def echo_sample(inputs, clip=None, d_max=100, batch=100, multiplicative=False, echo_mc = False,
                replace=False, fx_clip=None, plus_sx=True, calc_log=True, return_noise=False, **kwargs):
    # kwargs unused

    # inputs should be specified as list:
    #   [ f(X), s(X) ] with s(X) in log space if calc_log = True 
    # plus_sx =
    #   True if logsigmoid activation for s(X)
    #   False for softplus (equivalent)
    if isinstance(inputs, list):
        fx = inputs[0]
        sx = inputs[-1]
    else:
        fx = inputs

    # TO DO : CALC_LOG currently determines both whether to do log space calculations AND whether sx is a log
 
    fx_shape = fx.get_shape()
    sx_shape = sx.get_shape()
    z_dim = K.int_shape(fx)[-1]
    batch_size = batch
    batch = K.shape(fx)[0]

    if clip is None:
    # clip is multiplied times s(x) to ensure that last sampled term:
    #   (clip^d_max)*f(x) < machine precision 
        max_fx = fx_clip if fx_clip is not None else 1.0
        clip = (2**(-23)/max_fx)**(1.0/d_max)
    
    # fx_clip can be used to restrict magnitude of f(x), not used in paper
    # defaults to no clipping and M = 1 (e.g. with tanh activation for f(x))
    if fx_clip is not None: 
        fx = K.clip(fx, -fx_clip, fx_clip)
    

    if not calc_log:
        sx = tf.multiply(clip,sx)
        sx = tf.where(tf.abs(sx) < K.epsilon(), K.epsilon()*tf.sign(sx), sx)
    #raise ValueError('calc_log=False is not supported; sx has to be log_sigmoid')
    else:
        # plus_sx based on activation for sx = s(x):
        #   True for log_sigmoid
        #   False for softplus
        sx = tf.log(clip) + (-1*sx if not plus_sx else sx)
    
    #if echo_mc is not None and echo_mc:    
      # use mean centered fx for noise
    #  fx = fx - K.mean(fx, axis = 0, keepdims = True)
        


    if replace: # replace doesn't set batch size (using permute_neighbor_indices does)
        sx = K.batch_flatten(sx) if len(sx_shape) > 2 else sx 
        fx = K.batch_flatten(fx) if len(fx_shape) > 2 else fx 
        inds = K.reshape(random_indices(batch, d_max), (-1, 1))
        select_sx = gather_nd_reshape(sx, inds, (-1, d_max, z_dim))
        select_fx = gather_nd_reshape(fx, inds, (-1, d_max, z_dim))

        if len(sx_shape)>2:
          select_sx = K.expand_dims(K.expand_dims(select_sx, 2), 2)
          sx = K.expand_dims(K.expand_dims(sx, 1),1)
        if len(fx_shape)>2:
          select_fx = K.expand_dims(K.expand_dims(select_fx, 2), 2)
          fx = K.expand_dims(K.expand_dims(fx, 1),1)

    else:
        # batch x batch x z_dim 
        # for all i, stack_sx[i, :, :] = sx
        
        repeat = tf.multiply(tf.ones_like(tf.expand_dims(fx, 0)), tf.ones_like(tf.expand_dims(fx, 1)))
        stack_fx = tf.multiply(fx, repeat)
        stack_sx = tf.multiply(sx, repeat)

        # select a set of dmax examples from original fx / sx for each batch entry
        inds = indices_without_replacement(batch, d_max)
        
   
        # Alterntive method:  but note that permute_neighbor_indices sets the batch_size dimension != None
        # this necessitates the use of fit_generator, e.g. in training to avoid 'remainder' batches if data_size % batch > 0
        #inds = permute_neighbor_indices(batch_size, d_max, replace = replace)
        
        select_sx = tf.gather_nd(stack_sx, inds)
        select_fx = tf.gather_nd(stack_fx, inds)
      
    if calc_log:
        sx_echoes = tf.cumsum(select_sx, axis = 1, exclusive = True)
    else:
        sx_echoes = tf.cumprod(select_sx, axis = 1, exclusive = True)
    
    # calculates S(x0)S(x1)...S(x_l)*f(x_(l+1))
    sx_echoes = tf.exp(sx_echoes) if calc_log else sx_echoes 
    fx_sx_echoes = tf.multiply(select_fx, sx_echoes) 
    
    # performs the sum over dmax terms to calculate noise
    noise = tf.reduce_sum(fx_sx_echoes, axis = 1) 
    
    sx = sx if not calc_log else tf.exp(sx) 
    
    if multiplicative: # log z according to echo
        output = tf.exp(fx + tf.multiply(sx, noise))
    else:
        output = fx + tf.multiply(sx, noise) 

    return output if not return_noise else noise
Exemplo n.º 20
0
def cat_acc(y_true, y_pred):
    y_true = K.reshape(y_true, shape=(-1, 7, 37))
    y_pred = K.reshape(y_pred, shape=(-1, 7, 37))
    return K.mean(tf.keras.metrics.categorical_accuracy(y_true, y_pred))
Exemplo n.º 21
0
def body(X, Kernel, n, f, c, kH, kW, w_out, i, j):
    X_reshape = K.reshape(X[n, c, i:i + kH, j:j + kW], [1, -1])
    a = K.sum(Kernel * X_reshape)

    return a
Exemplo n.º 22
0
def top_3_k(y_true, y_pred):
    # Reshape into 2-d
    y_true = K.reshape(y_true, (-1, 37))
    y_pred = K.reshape(y_pred, (-1, 37))
    return K.mean(
        tf.keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k=3))
Exemplo n.º 23
0
    def call(self, obj_vecs, pred_vecs, edges):
        """
    Inputs:
    - obj_vecs: FloatTensor of shape (B, O, D) giving vectors for all objects
    - pred_vecs: FloatTensor of shape (B, T, D) giving vectors for all predicates
    - edges: LongTensor of shape (B, T, 2) where edges[k] = [i, j] indicates the
      presence of a triple [obj_vecs[i], pred_vecs[k], obj_vecs[j]]
    
    Outputs:
    - new_obj_vecs: FloatTensor of shape (B, O, D) giving new vectors for objects
    - new_pred_vecs: FloatTensor of shape (B, T, D) giving new vectors for predicates
    """

        O, T = K.int_shape(obj_vecs)[1], K.int_shape(pred_vecs)[1]
        Din, H, Dout = self.input_dim, self.hidden_dim, self.output_dim

        # Break apart indices for subjects and objects; these have shape (B, T,)
        s_idx, o_idx = tf.split(edges, 2, axis=2)  #shape =(B,T,1)

        s_idx = K.reshape(s_idx, (-1, T))  #shape =(B,T)
        o_idx = K.reshape(o_idx, (-1, T))

        i = tf.meshgrid(tf.range(self.batch_size), indexing="ij")
        i = K.reshape(i, (self.batch_size, 1))
        i = tf.broadcast_to(i, (self.batch_size, T))

        idx_s = tf.stack([i, s_idx], axis=-1)
        idx_o = tf.stack([i, o_idx], axis=-1)

        cur_s_vecs = tf.gather_nd(obj_vecs, idx_s)
        cur_o_vecs = tf.gather_nd(obj_vecs, idx_o)

        # Get current vectors for triples; shape is (B, T, 3 * Din)
        # Pass through net1 to get new triple vecs; shape is (B, T, 2 * H + Dout)
        cur_t_vecs = K.concatenate([cur_s_vecs, pred_vecs, cur_o_vecs], axis=2)
        new_t_vecs = self.net1(cur_t_vecs)

        # Break apart into new s, p, and o vecs; s and o vecs have shape (B, T, H) and
        # p vecs have shape (B, T, Dout)
        new_s_vecs = new_t_vecs[:, :, :H]
        new_p_vecs = new_t_vecs[:, :, H:(H + Dout)]
        new_o_vecs = new_t_vecs[:, :, (H + Dout):(2 * H + Dout)]

        # Allocate space for pooled object vectors of shape (B, O, H)
        pooled_obj_vecs = tf.zeros(shape=(self.batch_size, O, H))
        shape = K.shape(pooled_obj_vecs)

        # Use scatter_add to sum vectors for objects that appear in multiple triples;
        # we first need to expand the indices to have shape (B, T, H)
        s_idx = K.reshape(s_idx, (-1, T))
        o_idx = K.reshape(o_idx, (-1, T))

        i = tf.meshgrid(tf.range(self.batch_size), indexing="ij")
        i = K.reshape(i, (self.batch_size, 1))
        i = tf.broadcast_to(i, (self.batch_size, T))

        idx_s = tf.stack([i, s_idx], axis=-1)
        idx_o = tf.stack([i, o_idx], axis=-1)

        pooled_obj_vecs = tf.scatter_nd(idx_s, new_s_vecs, shape=shape)
        pooled_obj_vecs = tf.scatter_nd(idx_o, new_o_vecs,
                                        shape=shape)  # shape(B, O, H)

        if self.pooling == 'avg':
            # Figure out how many times each object has appeared, again using
            # some scatter_add trickery.
            obj_counts = tf.zeros(shape=(self.batch_size, O, H))
            ones = tf.ones(shape=(self.batch_size, T, H))

            obj_counts = tf.scatter_nd(idx_s, ones, shape=shape)
            obj_counts = tf.scatter_nd(idx_o, ones, shape=shape)

            # Divide the new object vectors by the number of times they
            # appeared, but first clamp at 1 to avoid dividing by zero;
            # objects that appear in no triples will have output vector 0
            # so this will not affect them.
            obj_counts = K.clip(obj_counts, min_value=1, max_value=None)
            pooled_obj_vecs = pooled_obj_vecs / obj_counts

            # Send pooled object vectors through net2 to get output object vectors, of shape (O, Dout)
            new_obj_vecs = self.net2(pooled_obj_vecs)

        return new_obj_vecs, new_p_vecs
Exemplo n.º 24
0
def styleTransfer(cData, sData, tData):
    print("   Building transfer model.")
    contentTensor = K.variable(cData, dtype=tf.float64)
    styleTensor = K.variable(sData, dtype=tf.float64)
    genFlatten = K.placeholder(CONTENT_IMG_H * CONTENT_IMG_W * 3,
                               dtype=tf.float64)
    genTensor = K.reshape(genFlatten, (1, CONTENT_IMG_H, CONTENT_IMG_W, 3))
    inputTensor = K.concatenate([contentTensor, styleTensor, genTensor],
                                axis=0)
    model = vgg19.VGG19(include_top=False,
                        weights='imagenet',
                        input_tensor=inputTensor)
    outputDict = dict([(layer.name, layer.output) for layer in model.layers])
    print("   VGG19 model loaded.")
    loss = 0.0
    styleLayerNames = [
        "block1_conv1", "block2_conv1", "block3_conv1", "block4_conv1",
        "block5_conv1"
    ]
    contentLayerName = "block5_conv2"
    print("   Calculating content loss.")
    contentLayer = outputDict[contentLayerName]
    contentOutput = contentLayer[0, :, :, :]
    genOutput = contentLayer[2, :, :, :]
    loss += CONTENT_WEIGHT * contentLoss(contentOutput, genOutput)

    print("   Calculating style loss.")

    styleLayerWeight = 1 / len(styleLayerNames)
    for layerName in styleLayerNames:
        styleLayer = outputDict[layerName]
        originalStyleOutput = styleLayer[1, :, :, :]
        genStyleOutput = styleLayer[2, :, :, :]
        loss += STYLE_WEIGHT * styleLayerWeight * styleLoss(
            originalStyleOutput, genStyleOutput)

    loss = totalLoss(loss)

    gradients = K.gradients(loss, genFlatten)[0]
    loss_function = K.function([genFlatten], [loss, gradients])
    gen_image_np = tData.flatten()
    print(gradients)
    print(loss)
    print(gen_image_np.shape)
    print("   Beginning transfer.")
    for i in range(TRANSFER_ROUNDS):
        print("   Step %d." % i)

        #loss_val, grad_val = loss_function([gen_image_np])
        #gen_image_np = gen_image_np + 0.1 * grad_val

        gen_image_np, f, d = fmin_l_bfgs_b(func=loss_function,
                                           x0=gen_image_np,
                                           maxiter=100,
                                           maxls=1200,
                                           maxfun=5000000)
        print("      Loss: %f." % f)
        img = deprocessImage(gen_image_np)
        saveFile = "./transfer_" + str(i) + ".jpg"
        imageio.imwrite(saveFile,
                        img)  #Uncomment when everything is working right.
        print("      Image saved to \"%s\"." % saveFile)
    print("   Transfer complete.")
Exemplo n.º 25
0
    def _process_sample(args):
        _hm, _reg, _wh, _kps, _hm_hp, _hp_offset = args
        _scores, _inds = tf.math.top_k(_hm, k=k, sorted=True)
        _classes = K.cast(_inds % cat, 'float32')
        _inds = K.cast(_inds / cat, 'int32')
        _xs = K.cast(_inds % width, 'float32')
        _ys = K.cast(K.cast(_inds / width, 'int32'), 'float32')
        _wh = K.gather(_wh, _inds)
        _reg = K.gather(_reg, _inds)
        _kps = K.gather(_kps, _inds)

        # shift keypoints by their center
        _kps_x = _kps[:, ::2]
        _kps_y = _kps[:, 1::2]
        _kps_x = _kps_x + K.expand_dims(_xs, -1)  # k x J
        _kps_y = _kps_y + K.expand_dims(_ys, -1)  # k x J
        _kps = K.stack([_kps_x, _kps_y], -1)  # k x J x 2

        _xs = _xs + _reg[..., 0]
        _ys = _ys + _reg[..., 1]

        _x1 = _xs - _wh[..., 0] / 2
        _y1 = _ys - _wh[..., 1] / 2
        _x2 = _xs + _wh[..., 0] / 2
        _y2 = _ys + _wh[..., 1] / 2

        # snap center keypoints to the closest heatmap keypoint
        def _process_channel(args):
            __kps, __hm_hp = args
            thresh = 0.1
            __hm_scores, __hm_inds = tf.math.top_k(__hm_hp, k=k, sorted=True)
            __hm_xs = K.cast(__hm_inds % width, 'float32')
            __hm_ys = K.cast(K.cast(__hm_inds / width, 'int32'), 'float32')
            __hp_offset = K.gather(_hp_offset, __hm_inds)
            __hm_xs = __hm_xs + __hp_offset[..., 0]
            __hm_ys = __hm_ys + __hp_offset[..., 1]
            mask = K.cast(__hm_scores > thresh, 'float32')
            __hm_scores = (1. - mask) * -1. + mask * __hm_scores
            __hm_xs = (1. - mask) * -10000. + mask * __hm_xs
            __hm_ys = (1. - mask) * -10000. + mask * __hm_ys
            __hm_kps = K.stack([__hm_xs, __hm_ys], -1)  # k x 2
            __broadcast_hm_kps = K.expand_dims(__hm_kps, 1)  # k x 1 x 2
            __broadcast_kps = K.expand_dims(__kps, 0)  # 1 x k x 2
            dist = K.sqrt(
                K.sum(K.pow(__broadcast_kps - __broadcast_hm_kps, 2),
                      2))  # k, k
            min_dist = K.min(dist, 0)
            min_ind = K.argmin(dist, 0)
            __hm_scores = K.gather(__hm_scores, min_ind)
            __hm_kps = K.gather(__hm_kps, min_ind)
            mask = (K.cast(__hm_kps[..., 0] < _x1, 'float32') +
                    K.cast(__hm_kps[..., 0] > _x2, 'float32') +
                    K.cast(__hm_kps[..., 1] < _y1, 'float32') +
                    K.cast(__hm_kps[..., 1] > _y2, 'float32') +
                    K.cast(__hm_scores < thresh, 'float32') + K.cast(
                        min_dist > 0.3 *
                        (K.maximum(_wh[..., 0], _wh[..., 1])), 'float32'))
            mask = K.expand_dims(mask, -1)
            mask = K.cast(mask > 0, 'float32')
            __kps = (1. - mask) * __hm_kps + mask * __kps
            return __kps

        _kps = K.permute_dimensions(_kps, (1, 0, 2))  # J x k x 2
        _hm_hp = K.permute_dimensions(_hm_hp, (1, 0))  # J x -1
        _kps = K.map_fn(_process_channel, [_kps, _hm_hp], dtype='float32')
        _kps = K.reshape(K.permute_dimensions(_kps, (1, 2, 0)),
                         (k, -1))  # k x J * 2

        # rescale to image coordinates
        _x1 = output_stride * _x1
        _y1 = output_stride * _y1
        _x2 = output_stride * _x2
        _y2 = output_stride * _y2
        _kps = output_stride * _kps

        _boxes = K.stack([_x1, _y1, _x2, _y2], -1)
        _scores = K.expand_dims(_scores, -1)
        _classes = K.expand_dims(_classes, -1)
        _detection = K.concatenate([_boxes, _scores, _kps, _classes], -1)
        return _detection
Exemplo n.º 26
0
    def call(self, x):
        x = x[0]

        #print("!x",x.shape)

        gs = self.gs
        k = self.k
        param = self.param
        C = self.numericalC

        #print("gs",gs,"k",k,"param",param,"C",C)

        for i in range(10):
            t.print(
                "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            )

        t.print("calling onetopk", self.metrik, output_stream=sys.stdout)

        #exit()

        mata = K.constant(self.getmata(gs))
        matb = K.constant(self.getmatb(gs))

        #print("mata",mata.shape)
        #print("matb",matb.shape)

        xp = K.permute_dimensions(x, (0, 2, 1))
        #print("xp",xp.shape)

        xa = K.dot(xp, mata)
        xb = K.dot(xp, matb)
        #print("xa",xa.shape,"xb",xb.shape)
        #exit()

        isval = xa[:, self.flag, :] * xb[:, self.flag, :]

        #return isval,isval
        #print("isval",isval.shape)
        #exit()

        ds = xa - xb
        #print("ds",ds.shape)

        dsp = K.permute_dimensions(ds, (0, 2, 1))
        #print("dsp",dsp.shape)

        dspsq = K.square(dsp)
        #print("dspsq",dspsq.shape)
        #print("self.metrik",self.metrik.shape)

        delt = K.dot(dspsq, self.metrik)
        #print("delt",delt.shape)

        delt = K.reshape(
            delt, (-1, self.gs * self.gs)) + (1 - isval) * self.emptyconst

        d = K.reshape(delt, (-1, gs, gs))
        #print("d",d.shape)

        #return d,d

        #####no self interactions
        if self.self_interaction == False:
            one = K.eye(gs)
            #print("one",one.shape)
            d += self.self_interaction_const * one
        #####end no self interactions

        v, _ = t.math.top_k(-d, k=k)
        #print("v",v.shape)
        #return v,v

        vb = v[:, :, -1]
        #print("vb",vb.shape)

        vbs = K.reshape(vb, (-1, gs, 1))
        #print("vbs",vbs.shape)

        su = d + vbs  #plus since top_k(-d)
        #print("su",su.shape)

        #map anything above 0 to 0 and anything below to 1, also map 0 to 1
        #p(-x)=C*d_C(-x)
        #     =d(-C*x)
        #     =1-r(Cx-1)+r(Cx)
        #experimentally:
        #   r(1-Cx)-r(-Cx)

        rel = K.relu(1 - C * su) - K.relu(-C * su)
        #print("rel",rel.shape)

        rel = K.relu(rel) - K.relu(rel - 1)

        #return rel,rel

        dez1 = K.reshape(rel, (-1, self.gs * self.gs))
        #print("dez1",dez1.shape)
        dez2 = dez1 * isval
        #print("dez2",dez2.shape)
        rel = K.reshape(dez2, (-1, self.gs, self.gs))
        print("rel", rel.shape)

        numnei = K.sum(rel, axis=-1)
        print("numnei", numnei.shape)
        factor = self.k / (numnei + 0.00000000001)

        #return K.concatenate((numnei,factor),axis=-1),factor#,factor#numnei,numnei

        print("factor", factor.shape)
        refactor = K.repeat(factor, self.gs)
        print("refactor", refactor.shape)

        refactor = K.permute_dimensions(refactor, (0, 2, 1))

        #return refactor,refactor

        rel = rel * refactor
        print("rel", rel.shape)

        #exit()

        if self.free == 0: return rel, x
        zero1 = K.zeros_like(x[:, :, 0])
        zero1 = K.reshape(zero1, (-1, x.shape[1], 1))
        #print("!",zero1.shape)
        zerolis = []
        for i in range(self.free):
            zerolis.append(zero1)
        zeros = K.concatenate(zerolis, axis=-1)
        #print(zeros.shape)

        return rel, K.concatenate((x, zeros), axis=-1)
Exemplo n.º 27
0
def sparse_gather(y_pred, target_indices, task_name):
    clf_h = Lambda(lambda x: K.reshape(x, (-1, K.int_shape(x)[-1])),
                   name=task_name + '_flatten')(y_pred)
    return Lambda(lambda x: K.gather(x[0], K.cast(x[1], 'int32')),
                  name=task_name + '_gather')([clf_h, target_indices])
Exemplo n.º 28
0
 def call(self, x, **kwargs):
     assert isinstance(x, list), 'SliceLayer input is not a list'
     return x[0] * K.reshape(x[1], (-1, 1))
 def call(self, inputs, q_mask=None, v_mask=None, a_mask=None):
     """实现多头注意力
     q_mask: 对输入的query序列的mask。
             主要是将输出结果的padding部分置0。
     v_mask: 对输入的value序列的mask。
             主要是防止attention读取到padding信息。
     a_mask: 对attention矩阵的mask。
             不同的attention mask对应不同的应用。
     """
     q, k, v = inputs[:3]
     if a_mask:
         if len(inputs) == 3:
             a_mask = 'history_only'
         else:
             a_mask = inputs[3]
     if q_mask is not None:
         if not hasattr(self, 'q_mask_layer'):
             self.q_mask_layer = search_layer(q, q_mask)
         q_mask = self.q_mask_layer.output_mask
     if v_mask is not None:
         if not hasattr(self, 'v_mask_layer'):
             self.v_mask_layer = search_layer(v, v_mask)
         v_mask = self.v_mask_layer.output_mask
     # Pooling
     if self.pool_size > 1:
         is_self_attention = (q is k is v)
         q_in_len = K.shape(q)[1]
         q = sequence_masking(q, q_mask, 0)
         q = divisible_temporal_padding(q, self.pool_size)
         q = pool1d(q, self.pool_size, self.pool_size, pool_mode='avg')
         if is_self_attention:
             k = v = q
         else:
             k = sequence_masking(k, v_mask, 0)
             k = divisible_temporal_padding(k, self.pool_size)
             k = pool1d(k, self.pool_size, self.pool_size, pool_mode='avg')
             v = sequence_masking(v, v_mask, 0)
             v = divisible_temporal_padding(v, self.pool_size)
             v = pool1d(v, self.pool_size, self.pool_size, pool_mode='avg')
         if v_mask is not None:
             v_mask = v_mask[:, ::self.pool_size]
         if a_mask is not None and not is_string(a_mask):
             a_mask = a_mask[..., ::self.pool_size, ::self.pool_size]
     # 线性变换
     qw = self.q_dense(q)
     kw = self.k_dense(k)
     vw = self.v_dense(v)
     # 形状变换
     qw = K.reshape(qw, (-1, K.shape(q)[1], self.heads, self.key_size))
     kw = K.reshape(kw, (-1, K.shape(k)[1], self.heads, self.key_size))
     vw = K.reshape(vw, (-1, K.shape(v)[1], self.heads, self.head_size))
     # Attention
     a = tf.einsum('bjhd,bkhd->bhjk', qw, kw)
     # 相对位置编码
     if self.max_relative_position is not None:
         q_idxs = K.arange(0, K.shape(q)[1], dtype='int32')
         q_idxs = K.expand_dims(q_idxs, 1)
         v_idxs = K.arange(0, K.shape(v)[1], dtype='int32')
         v_idxs = K.expand_dims(v_idxs, 0)
         pos_ids = v_idxs - q_idxs
         pos_ids = K.clip(pos_ids, -self.max_relative_position,
                          self.max_relative_position)
         pos_ids = pos_ids + self.max_relative_position
         pos_embeddings = K.gather(self.relative_embeddings, pos_ids)
         a = a + tf.einsum('bjhd,jkd->bhjk', qw, pos_embeddings)
     # Attention(续)
     a = a / self.key_size ** 0.5
     a = sequence_masking(a, v_mask, 1, -1)
     if a_mask is not None:
         if is_string(a_mask):
             ones = K.ones_like(a[:1, :1])
             a_mask = (ones - tf.linalg.band_part(ones, -1, 0)) * 1e12
             a = a - a_mask
         else:
             a = a - (1 - a_mask) * 1e12
     a = K.softmax(a)
     # 完成输出
     o = tf.einsum('bhjk,bkhd->bjhd', a, vw)
     if self.max_relative_position is not None:
         o = o + tf.einsum('bhjk,jkd->bjhd', a, pos_embeddings)
     o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim))
     o = self.o_dense(o)
     # 恢复长度
     if self.pool_size > 1:
         o = K.repeat_elements(o, self.pool_size, 1)[:, :q_in_len]
     # 返回结果
     o = sequence_masking(o, q_mask, 0)
     return o
Exemplo n.º 30
0
    def call(self, x, mask=None):

        assert (len(x) == 2)

        img = x[0]
        rois = x[1]

        input_shape = K.shape(img)

        outputs = []

        for roi_idx in range(self.num_rois):

            x = rois[0, roi_idx, 0]
            y = rois[0, roi_idx, 1]
            w = rois[0, roi_idx, 2]
            h = rois[0, roi_idx, 3]

            row_length = w / float(self.pool_size)
            col_length = h / float(self.pool_size)

            num_pool_regions = self.pool_size

            # NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op
            # in theano. The theano implementation is much less efficient and leads to long compile times

            if self.dim_ordering == 'th':
                for jy in range(num_pool_regions):
                    for ix in range(num_pool_regions):
                        x1 = x + ix * row_length
                        x2 = x1 + row_length
                        y1 = y + jy * col_length
                        y2 = y1 + col_length

                        x1 = K.cast(x1, 'int32')
                        x2 = K.cast(x2, 'int32')
                        y1 = K.cast(y1, 'int32')
                        y2 = K.cast(y2, 'int32')

                        x2 = x1 + K.maximum(1, x2 - x1)
                        y2 = y1 + K.maximum(1, y2 - y1)

                        new_shape = [
                            input_shape[0], input_shape[1], y2 - y1, x2 - x1
                        ]

                        x_crop = img[:, :, y1:y2, x1:x2]
                        xm = K.reshape(x_crop, new_shape)
                        pooled_val = K.max(xm, axis=(2, 3))
                        outputs.append(pooled_val)

            elif self.dim_ordering == 'tf':
                x = K.cast(x, 'int32')
                y = K.cast(y, 'int32')
                w = K.cast(w, 'int32')
                h = K.cast(h, 'int32')

                rs = tf.image.resize_images(img[:, y:y + h, x:x + w, :],
                                            (self.pool_size, self.pool_size))
                outputs.append(rs)

        final_output = K.concatenate(outputs, axis=0)
        final_output = K.reshape(final_output,
                                 (1, self.num_rois, self.pool_size,
                                  self.pool_size, self.nb_channels))

        if self.dim_ordering == 'th':
            final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3))
        else:
            final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))

        return final_output