Пример #1
0
def ssd_model_fn(features, labels, mode, params):
    """model_fn for SSD to be used with our Estimator."""
    shape = labels['shape']
    loc_targets = labels['loc_targets']  # (8,8732,4)
    cls_targets = labels['cls_targets']  # (8,8732)
    match_scores = labels['match_scores']  # (8,8732)

    global global_anchor_info
    decode_fn = global_anchor_info['decode_fn']
    num_anchors_per_layer = global_anchor_info[
        'num_anchors_per_layer']  # 5776+2166+600+150+40=8732
    all_num_anchors_depth = global_anchor_info['all_num_anchors_depth']

    # bboxes_pred = decode_fn(loc_targets[0])
    # bboxes_pred = [tf.reshape(preds, [-1, 4]) for preds in bboxes_pred]
    # bboxes_pred = tf.concat(bboxes_pred, axis=0)
    # save_image_op = tf.py_func(save_image_with_bbox,
    #                         [ssd_preprocessing.unwhiten_image(features[0]),
    #                         tf.clip_by_value(cls_targets[0], 0, tf.int64.max),
    #                         match_scores[0],
    #                         bboxes_pred],
    #                         tf.int64, stateful=True)
    # with tf.control_dependencies([save_image_op]):

    # print(all_num_anchors_depth)
    with tf.variable_scope(
            params['model_scope'],
            default_name=None,
            values=[features],
            reuse=tf.AUTO_REUSE):  # params['model_scope']:'ssd300'
        backbone = ssd_net.VGG16Backbone(params['data_format'])
        feature_layers = backbone.forward(
            features, training=(mode == tf.estimator.ModeKeys.TRAIN))
        # print(feature_layers)
        location_pred, cls_pred = ssd_net.multibox_head(
            feature_layers,
            params['num_classes'],
            all_num_anchors_depth,
            data_format=params['data_format']
        )  # (8,16,38,38), (8,84,38,38)  21*4=84,21*6=126

        if params['data_format'] == 'channels_first':
            cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred]
            location_pred = [
                tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
            ]

        cls_pred = [
            tf.reshape(pred,
                       [tf.shape(features)[0], -1, params['num_classes']])
            for pred in cls_pred
        ]  # 行数表示该层特征图预测框数(5776),列数表示类别数(21)
        location_pred = [
            tf.reshape(pred, [tf.shape(features)[0], -1, 4])
            for pred in location_pred
        ]

        cls_pred = tf.concat(cls_pred, axis=1)  # 沿某维度连接张量
        location_pred = tf.concat(location_pred, axis=1)

        cls_pred = tf.reshape(
            cls_pred,
            [-1, params['num_classes']])  # 行数为 batch 数目乘以每张图像的 default box 数目
        location_pred = tf.reshape(location_pred, [-1, 4])

    with tf.device('/cpu:0'):
        # tf.control_dependencies 指定操作的执行关系
        # 先执行 cls_pred, location_pred,再执行 with 里面的内容
        with tf.control_dependencies([cls_pred, location_pred]):
            with tf.name_scope('post_forward'):
                # bboxes_pred = decode_fn(location_pred)
                bboxes_pred = tf.map_fn(
                    lambda _preds: decode_fn(_preds),
                    tf.reshape(location_pred, [tf.shape(features)[0], -1, 4]),
                    dtype=[tf.float32] * len(num_anchors_per_layer),
                    back_prop=False)  # 对每个定位预测框均进行解码操作
                # cls_targets = tf.Print(cls_targets, [tf.shape(bboxes_pred[0]),tf.shape(bboxes_pred[1]),tf.shape(bboxes_pred[2]),tf.shape(bboxes_pred[3])])
                bboxes_pred = [
                    tf.reshape(preds, [-1, 4]) for preds in bboxes_pred
                ]
                bboxes_pred = tf.concat(bboxes_pred, axis=0)

                flaten_cls_targets = tf.reshape(cls_targets, [-1])
                flaten_match_scores = tf.reshape(match_scores, [-1])
                flaten_loc_targets = tf.reshape(loc_targets, [-1, 4])

                # each positive examples has one label
                positive_mask = flaten_cls_targets > 0  # 0 是背景
                n_positives = tf.count_nonzero(positive_mask)  # 计算张量非 0 元素个数

                batch_n_positives = tf.count_nonzero(cls_targets, -1)

                # (8,8732) tf.logical_and(tf.equal(cls_targets, 0), match_scores > 0.)
                batch_negtive_mask = tf.equal(cls_targets, 0)

                # (8,)一个 batch 中每张图像的 backround box 数目
                batch_n_negtives = tf.count_nonzero(batch_negtive_mask, -1)

                # negative_ratio=3
                batch_n_neg_select = tf.cast(
                    params['negative_ratio'] *
                    tf.cast(batch_n_positives, tf.float32), tf.int32)
                batch_n_neg_select = tf.minimum(
                    batch_n_neg_select, tf.cast(batch_n_negtives, tf.int32))

                # hard negative mining for classification
                # (8,8732),获取预测为背景的概率
                predictions_for_bg = tf.nn.softmax(
                    tf.reshape(
                        cls_pred,
                        [tf.shape(features)[0], -1, params['num_classes']
                         ]))[:, :, 0]
                prob_for_negtives = tf.where(
                    batch_negtive_mask,
                    0. - predictions_for_bg,
                    # ignore all the positives
                    0. - tf.ones_like(predictions_for_bg))

                # tf.nn.top_k 返回排序后的前 k 个最大值
                topk_prob_for_bg, _ = tf.nn.top_k(
                    prob_for_negtives, k=tf.shape(prob_for_negtives)[1])
                score_at_k = tf.gather_nd(
                    topk_prob_for_bg,
                    tf.stack([
                        tf.range(tf.shape(features)[0]), batch_n_neg_select - 1
                    ],
                             axis=-1))

                # 获取前 k 个背景
                selected_neg_mask = prob_for_negtives >= tf.expand_dims(
                    score_at_k, axis=-1)

                # include both selected negtive and all positive examples
                final_mask = tf.stop_gradient(
                    tf.logical_or(
                        tf.reshape(
                            tf.logical_and(batch_negtive_mask,
                                           selected_neg_mask), [-1]),
                        positive_mask))
                total_examples = tf.count_nonzero(final_mask)

                # 计算分类梯度时仅考虑正样本和背景中的前 k 个
                cls_pred = tf.boolean_mask(cls_pred, final_mask)

                # 不回归背景 bounding box
                location_pred = tf.boolean_mask(
                    location_pred, tf.stop_gradient(positive_mask))
                flaten_cls_targets = tf.boolean_mask(
                    tf.clip_by_value(flaten_cls_targets, 0,
                                     params['num_classes']), final_mask)
                flaten_loc_targets = tf.stop_gradient(
                    tf.boolean_mask(flaten_loc_targets, positive_mask))

                # 预测结果包括:类别、该类别概率、bounding box 位置
                predictions = {
                    'classes':
                    tf.argmax(cls_pred, axis=-1),
                    'probabilities':
                    tf.reduce_max(tf.nn.softmax(cls_pred,
                                                name='softmax_tensor'),
                                  axis=-1),
                    'loc_predict':
                    bboxes_pred
                }

                cls_accuracy = tf.metrics.accuracy(flaten_cls_targets,
                                                   predictions['classes'])
                metrics = {'cls_accuracy': cls_accuracy}

                # Create a tensor named train_accuracy for logging purposes.
                tf.identity(cls_accuracy[1],
                            name='cls_accuracy')  # 创建一个新的 op,加入日志
                tf.summary.scalar('cls_accuracy', cls_accuracy[1])

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    # cross_entropy = tf.cond(n_positives > 0, lambda: tf.losses.sparse_softmax_cross_entropy(labels=flaten_cls_targets, logits=cls_pred), lambda: 0.)# * (params['negative_ratio'] + 1.)
    # flaten_cls_targets=tf.Print(flaten_cls_targets, [flaten_loc_targets],summarize=50000)
    cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=flaten_cls_targets, logits=cls_pred) * \
                    (params['negative_ratio'] + 1.)

    # Create a tensor named cross_entropy for logging purposes.
    tf.identity(cross_entropy, name='cross_entropy_loss')
    tf.summary.scalar('cross_entropy_loss', cross_entropy)

    # loc_loss = tf.cond(n_positives > 0, lambda: modified_smooth_l1(location_pred, tf.stop_gradient(flaten_loc_targets), sigma=1.),
    # lambda: tf.zeros_like(location_pred))

    # 定位误差
    loc_loss = modified_smooth_l1(location_pred, flaten_loc_targets, sigma=1.)
    # loc_loss = modified_smooth_l1(location_pred, tf.stop_gradient(gtargets))
    loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1),
                              name='location_loss')
    tf.summary.scalar('location_loss', loc_loss)
    tf.losses.add_loss(loc_loss)

    l2_loss_vars = []
    for trainable_var in tf.trainable_variables():
        if '_bn' not in trainable_var.name:
            if 'conv4_3_scale' not in trainable_var.name:
                # 如果没有把 conv4_3 的 L2 normalization 单独拿出来,那将该层与其余层一样计算 L2 损失
                l2_loss_vars.append(tf.nn.l2_loss(trainable_var))
            else:
                # 否则降低 L2 损失权重
                l2_loss_vars.append(tf.nn.l2_loss(trainable_var) * 0.1)
    # Add weight decay to the loss. We exclude the batch norm variables because
    # doing so leads to a small improvement in accuracy.损失包括三方面:分类交叉熵损失、定位损失、L2 正则化损失
    total_loss = tf.add(cross_entropy + loc_loss,
                        tf.multiply(params['weight_decay'],
                                    tf.add_n(l2_loss_vars),
                                    name='l2_loss'),
                        name='total_loss')

    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_or_create_global_step()

        # lr_decay_factors=0.1,1,0.1,0.01
        lr_values = [
            params['learning_rate'] * decay
            for decay in params['lr_decay_factors']
        ]

        # 在不同的迭代次数范围指定不同的学习率
        learning_rate = tf.train.piecewise_constant(
            tf.cast(global_step, tf.int32),
            [int(_) for _ in params['decay_boundaries']],
            # decay_boundaries = 500, 80000, 100000
            lr_values)
        truncated_learning_rate = tf.maximum(learning_rate,
                                             tf.constant(
                                                 params['end_learning_rate'],
                                                 dtype=learning_rate.dtype),
                                             name='learning_rate')
        # Create a tensor named learning_rate for logging purposes.
        tf.summary.scalar('learning_rate', truncated_learning_rate)

        optimizer = tf.train.MomentumOptimizer(
            learning_rate=truncated_learning_rate, momentum=params['momentum'])

        # 适用于多 GPU 训练
        optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(total_loss, global_step)
    else:
        train_op = None

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        loss=total_loss,
        train_op=train_op,
        eval_metric_ops=metrics,
        scaffold=tf.train.Scaffold(
            init_fn=get_init_fn()))  # tf.train.Scaffold 进行模型初始化
Пример #2
0
def ssd_model_fn(features, labels, mode, params):
    """model_fn for SSD to be used with our Estimator."""
    filename = features['filename']
    shape = features['shape']
    loc_targets = features['loc_targets']
    cls_targets = features['cls_targets']
    match_scores = features['match_scores']
    features = features['image']

    global global_anchor_info
    decode_fn = global_anchor_info['decode_fn']
    num_anchors_per_layer = global_anchor_info['num_anchors_per_layer']
    all_num_anchors_depth = global_anchor_info['all_num_anchors_depth']

    with tf.variable_scope(params['model_scope'],
                           default_name=None,
                           values=[features],
                           reuse=tf.AUTO_REUSE):
        backbone = ssd_net.VGG16Backbone(params['data_format'])
        feature_layers = backbone.forward(
            features, training=(mode == tf.estimator.ModeKeys.TRAIN))
        #print(feature_layers)
        location_pred, cls_pred = ssd_net.multibox_head(
            feature_layers,
            params['num_classes'],
            all_num_anchors_depth,
            data_format=params['data_format'])
        if params['data_format'] == 'channels_first':
            cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred]
            location_pred = [
                tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
            ]

        cls_pred = [
            tf.reshape(pred,
                       [tf.shape(features)[0], -1, params['num_classes']])
            for pred in cls_pred
        ]
        location_pred = [
            tf.reshape(pred, [tf.shape(features)[0], -1, 4])
            for pred in location_pred
        ]

        cls_pred = tf.concat(cls_pred, axis=1)
        location_pred = tf.concat(location_pred, axis=1)

        cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']])
        location_pred = tf.reshape(location_pred, [-1, 4])

    with tf.device('/cpu:0'):
        bboxes_pred = decode_fn(location_pred)
        bboxes_pred = tf.concat(bboxes_pred, axis=0)
        selected_bboxes, selected_scores = parse_by_class(
            cls_pred, bboxes_pred, params['num_classes'],
            params['select_threshold'], params['min_size'],
            params['keep_topk'], params['nms_topk'], params['nms_threshold'])

    predictions = {'filename': filename, 'shape': shape}
    for class_ind in range(1, params['num_classes']):
        predictions['scores_{}'.format(class_ind)] = tf.expand_dims(
            selected_scores[class_ind], axis=0)
        predictions['bboxes_{}'.format(class_ind)] = tf.expand_dims(
            selected_bboxes[class_ind], axis=0)

    flaten_cls_targets = tf.reshape(cls_targets, [-1])
    flaten_match_scores = tf.reshape(match_scores, [-1])
    flaten_loc_targets = tf.reshape(loc_targets, [-1, 4])

    # each positive examples has one label
    positive_mask = flaten_cls_targets > 0
    n_positives = tf.count_nonzero(positive_mask)

    batch_n_positives = tf.count_nonzero(cls_targets, -1)

    batch_negtive_mask = tf.equal(
        cls_targets,
        0)  #tf.logical_and(tf.equal(cls_targets, 0), match_scores > 0.)
    batch_n_negtives = tf.count_nonzero(batch_negtive_mask, -1)

    batch_n_neg_select = tf.cast(
        params['negative_ratio'] * tf.cast(batch_n_positives, tf.float32),
        tf.int32)
    batch_n_neg_select = tf.minimum(batch_n_neg_select,
                                    tf.cast(batch_n_negtives, tf.int32))

    # hard negative mining for classification
    predictions_for_bg = tf.nn.softmax(
        tf.reshape(cls_pred,
                   [tf.shape(features)[0], -1, params['num_classes']]))[:, :,
                                                                        0]
    prob_for_negtives = tf.where(
        batch_negtive_mask,
        0. - predictions_for_bg,
        # ignore all the positives
        0. - tf.ones_like(predictions_for_bg))
    topk_prob_for_bg, _ = tf.nn.top_k(prob_for_negtives,
                                      k=tf.shape(prob_for_negtives)[1])
    score_at_k = tf.gather_nd(
        topk_prob_for_bg,
        tf.stack([tf.range(tf.shape(features)[0]), batch_n_neg_select - 1],
                 axis=-1))

    selected_neg_mask = prob_for_negtives >= tf.expand_dims(score_at_k,
                                                            axis=-1)

    # include both selected negtive and all positive examples
    final_mask = tf.stop_gradient(
        tf.logical_or(
            tf.reshape(tf.logical_and(batch_negtive_mask, selected_neg_mask),
                       [-1]), positive_mask))
    total_examples = tf.count_nonzero(final_mask)

    cls_pred = tf.boolean_mask(cls_pred, final_mask)
    location_pred = tf.boolean_mask(location_pred,
                                    tf.stop_gradient(positive_mask))
    flaten_cls_targets = tf.boolean_mask(
        tf.clip_by_value(flaten_cls_targets, 0, params['num_classes']),
        final_mask)
    flaten_loc_targets = tf.stop_gradient(
        tf.boolean_mask(flaten_loc_targets, positive_mask))

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    #cross_entropy = (params['negative_ratio'] + 1.) * tf.cond(n_positives > 0, lambda: tf.losses.sparse_softmax_cross_entropy(labels=glabels, logits=cls_pred), lambda: 0.)
    cross_entropy = tf.losses.sparse_softmax_cross_entropy(
        labels=flaten_cls_targets,
        logits=cls_pred) * (params['negative_ratio'] + 1.)
    # Create a tensor named cross_entropy for logging purposes.
    tf.identity(cross_entropy, name='cross_entropy_loss')
    tf.summary.scalar('cross_entropy_loss', cross_entropy)

    #loc_loss = tf.cond(n_positives > 0, lambda: modified_smooth_l1(location_pred, tf.stop_gradient(flaten_loc_targets), sigma=1.), lambda: tf.zeros_like(location_pred))
    loc_loss = modified_smooth_l1(location_pred, flaten_loc_targets, sigma=1.)
    loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1),
                              name='location_loss')
    tf.summary.scalar('location_loss', loc_loss)
    tf.losses.add_loss(loc_loss)

    # Add weight decay to the loss. We exclude the batch norm variables because
    # doing so leads to a small improvement in accuracy.
    total_loss = tf.add(cross_entropy, loc_loss, name='total_loss')

    cls_accuracy = tf.metrics.accuracy(flaten_cls_targets,
                                       tf.argmax(cls_pred, axis=-1))

    # Create a tensor named train_accuracy for logging purposes.
    tf.identity(cls_accuracy[1], name='cls_accuracy')
    tf.summary.scalar('cls_accuracy', cls_accuracy[1])

    summary_hook = tf.train.SummarySaverHook(
        save_steps=params['save_summary_steps'],
        output_dir=params['summary_dir'],
        summary_op=tf.summary.merge_all())
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          prediction_hooks=[summary_hook],
                                          loss=None,
                                          train_op=None)
    else:
        raise ValueError('This script only support "PREDICT" mode!')
Пример #3
0
def ssd_model_fn(features, labels, mode, params):
    """model_fn for SSD to be used with our Estimator."""
    shape = labels['shape']
    loc_targets = labels['loc_targets']
    cls_targets = labels['cls_targets']
    match_scores = labels['match_scores']

    global global_anchor_info
    decode_fn = global_anchor_info['decode_fn']
    num_anchors_per_layer = global_anchor_info['num_anchors_per_layer']
    all_num_anchors_depth = global_anchor_info['all_num_anchors_depth']

    # bboxes_pred = decode_fn(loc_targets[0])
    # bboxes_pred = [tf.reshape(preds, [-1, 4]) for preds in bboxes_pred]
    # bboxes_pred = tf.concat(bboxes_pred, axis=0)
    # save_image_op = tf.py_func(save_image_with_bbox,
    #                         [ssd_preprocessing.unwhiten_image(features[0]),
    #                         tf.clip_by_value(cls_targets[0], 0, tf.int64.max),
    #                         match_scores[0],
    #                         bboxes_pred],
    #                         tf.int64, stateful=True)
    # with tf.control_dependencies([save_image_op]):

    #print(all_num_anchors_depth)
    with tf.variable_scope(params['model_scope'],
                           default_name=None,
                           values=[features],
                           reuse=tf.AUTO_REUSE):
        backbone = ssd_net.VGG16Backbone(params['data_format'])
        feature_layers = backbone.forward(
            features, training=(mode == tf.estimator.ModeKeys.TRAIN))
        #print(feature_layers)
        location_pred, cls_pred = ssd_net.multibox_head(
            feature_layers,
            params['num_classes'],
            all_num_anchors_depth,
            data_format=params['data_format'])

        if params['data_format'] == 'channels_first':
            cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred]
            location_pred = [
                tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
            ]

        cls_pred = [
            tf.reshape(pred,
                       [tf.shape(features)[0], -1, params['num_classes']])
            for pred in cls_pred
        ]
        location_pred = [
            tf.reshape(pred, [tf.shape(features)[0], -1, 4])
            for pred in location_pred
        ]

        cls_pred = tf.concat(cls_pred, axis=1)
        location_pred = tf.concat(location_pred, axis=1)

        cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']])
        location_pred = tf.reshape(location_pred, [-1, 4])

    with tf.device('/cpu:0'):
        with tf.control_dependencies([cls_pred, location_pred]):
            with tf.name_scope('post_forward'):
                #bboxes_pred = decode_fn(location_pred)
                bboxes_pred = tf.map_fn(
                    lambda _preds: decode_fn(_preds),
                    tf.reshape(location_pred, [tf.shape(features)[0], -1, 4]),
                    dtype=[tf.float32] * len(num_anchors_per_layer),
                    back_prop=False)
                #cls_targets = tf.Print(cls_targets, [tf.shape(bboxes_pred[0]),tf.shape(bboxes_pred[1]),tf.shape(bboxes_pred[2]),tf.shape(bboxes_pred[3])])
                bboxes_pred = [
                    tf.reshape(preds, [-1, 4]) for preds in bboxes_pred
                ]
                bboxes_pred = tf.concat(bboxes_pred, axis=0)

                flaten_cls_targets = tf.reshape(cls_targets, [-1])
                flaten_match_scores = tf.reshape(match_scores, [-1])
                flaten_loc_targets = tf.reshape(loc_targets, [-1, 4])

                # each positive examples has one label
                positive_mask = flaten_cls_targets > 0
                n_positives = tf.count_nonzero(positive_mask)

                batch_n_positives = tf.count_nonzero(cls_targets, -1)

                batch_negtive_mask = tf.equal(
                    cls_targets, 0
                )  #tf.logical_and(tf.equal(cls_targets, 0), match_scores > 0.)
                batch_n_negtives = tf.count_nonzero(batch_negtive_mask, -1)

                batch_n_neg_select = tf.cast(
                    params['negative_ratio'] *
                    tf.cast(batch_n_positives, tf.float32), tf.int32)
                batch_n_neg_select = tf.minimum(
                    batch_n_neg_select, tf.cast(batch_n_negtives, tf.int32))

                # hard negative mining for classification
                predictions_for_bg = tf.nn.softmax(
                    tf.reshape(
                        cls_pred,
                        [tf.shape(features)[0], -1, params['num_classes']
                         ]))[:, :, 0]
                prob_for_negtives = tf.where(
                    batch_negtive_mask,
                    0. - predictions_for_bg,
                    # ignore all the positives
                    0. - tf.ones_like(predictions_for_bg))
                topk_prob_for_bg, _ = tf.nn.top_k(
                    prob_for_negtives, k=tf.shape(prob_for_negtives)[1])
                score_at_k = tf.gather_nd(
                    topk_prob_for_bg,
                    tf.stack([
                        tf.range(tf.shape(features)[0]), batch_n_neg_select - 1
                    ],
                             axis=-1))

                selected_neg_mask = prob_for_negtives >= tf.expand_dims(
                    score_at_k, axis=-1)

                # include both selected negtive and all positive examples
                final_mask = tf.stop_gradient(
                    tf.logical_or(
                        tf.reshape(
                            tf.logical_and(batch_negtive_mask,
                                           selected_neg_mask), [-1]),
                        positive_mask))
                total_examples = tf.count_nonzero(final_mask)

                cls_pred = tf.boolean_mask(cls_pred, final_mask)
                location_pred = tf.boolean_mask(
                    location_pred, tf.stop_gradient(positive_mask))
                flaten_cls_targets = tf.boolean_mask(
                    tf.clip_by_value(flaten_cls_targets, 0,
                                     params['num_classes']), final_mask)
                flaten_loc_targets = tf.stop_gradient(
                    tf.boolean_mask(flaten_loc_targets, positive_mask))

                predictions = {
                    'classes':
                    tf.argmax(cls_pred, axis=-1),
                    'probabilities':
                    tf.reduce_max(tf.nn.softmax(cls_pred,
                                                name='softmax_tensor'),
                                  axis=-1),
                    'loc_predict':
                    bboxes_pred
                }

                cls_accuracy = tf.metrics.accuracy(flaten_cls_targets,
                                                   predictions['classes'])
                metrics = {'cls_accuracy': cls_accuracy}

                # Create a tensor named train_accuracy for logging purposes.
                tf.identity(cls_accuracy[1], name='cls_accuracy')
                tf.summary.scalar('cls_accuracy', cls_accuracy[1])

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    #cross_entropy = tf.cond(n_positives > 0, lambda: tf.losses.sparse_softmax_cross_entropy(labels=flaten_cls_targets, logits=cls_pred), lambda: 0.)# * (params['negative_ratio'] + 1.)
    #flaten_cls_targets=tf.Print(flaten_cls_targets, [flaten_loc_targets],summarize=50000)
    cross_entropy = tf.losses.sparse_softmax_cross_entropy(
        labels=flaten_cls_targets,
        logits=cls_pred) * (params['negative_ratio'] + 1.)
    # Create a tensor named cross_entropy for logging purposes.
    tf.identity(cross_entropy, name='cross_entropy_loss')
    tf.summary.scalar('cross_entropy_loss', cross_entropy)

    #loc_loss = tf.cond(n_positives > 0, lambda: modified_smooth_l1(location_pred, tf.stop_gradient(flaten_loc_targets), sigma=1.), lambda: tf.zeros_like(location_pred))
    loc_loss = modified_smooth_l1(location_pred, flaten_loc_targets, sigma=1.)
    #loc_loss = modified_smooth_l1(location_pred, tf.stop_gradient(gtargets))
    loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1),
                              name='location_loss')
    tf.summary.scalar('location_loss', loc_loss)
    tf.losses.add_loss(loc_loss)

    l2_loss_vars = []
    for trainable_var in tf.trainable_variables():
        if '_bn' not in trainable_var.name:
            if 'conv4_3_scale' not in trainable_var.name:
                l2_loss_vars.append(tf.nn.l2_loss(trainable_var))
            else:
                l2_loss_vars.append(tf.nn.l2_loss(trainable_var) * 0.1)
    # Add weight decay to the loss. We exclude the batch norm variables because
    # doing so leads to a small improvement in accuracy.
    total_loss = tf.add(cross_entropy + loc_loss,
                        tf.multiply(params['weight_decay'],
                                    tf.add_n(l2_loss_vars),
                                    name='l2_loss'),
                        name='total_loss')

    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_or_create_global_step()

        lr_values = [
            params['learning_rate'] * decay
            for decay in params['lr_decay_factors']
        ]
        learning_rate = tf.train.piecewise_constant(
            tf.cast(global_step, tf.int32),
            [int(_) for _ in params['decay_boundaries']], lr_values)
        truncated_learning_rate = tf.maximum(learning_rate,
                                             tf.constant(
                                                 params['end_learning_rate'],
                                                 dtype=learning_rate.dtype),
                                             name='learning_rate')
        # Create a tensor named learning_rate for logging purposes.
        tf.summary.scalar('learning_rate', truncated_learning_rate)

        optimizer = tf.train.MomentumOptimizer(
            learning_rate=truncated_learning_rate, momentum=params['momentum'])
        optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(total_loss, global_step)
    else:
        train_op = None

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        loss=total_loss,
        train_op=train_op,
        eval_metric_ops=metrics,
        scaffold=tf.train.Scaffold(init_fn=get_init_fn()))
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        with tf.name_scope('define_input'):
            image_input = tf.placeholder(tf.uint8,
                                         shape=(None, None, 3),
                                         name='image_input')

        features = ssd_preprocessing.preprocess_for_eval(
            image_input,
            out_shape,
            data_format=FLAGS.data_format,
            output_rgb=False)
        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator.AnchorCreator(
            out_shape,
            layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                           (1, 1)],
            anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                           (0.9, )],
            extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ),
                                 (0.6315, ), (0.8078, ), (0.9836, )],
            anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333),
                           (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333),
                           (1., 2., .5), (1., 2., .5)],
            #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333),
            #(2., 3., .5, 0.3333), (2., .5), (2., .5)],
            layer_steps=[8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        def decode_fn(pred):
            return anchor_encoder_decoder.ext_decode_all_anchors(
                pred, all_anchors, all_num_anchors_depth,
                all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(features, training=False)
            location_pred, cls_pred = ssd_net.multibox_head(
                feature_layers,
                FLAGS.num_classes,
                all_num_anchors_depth,
                data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                ]
                location_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
                ]

            cls_pred = [
                tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred
            ]
            location_pred = [
                tf.reshape(pred, [-1, 4]) for pred in location_pred
            ]

            with tf.variable_scope('cls_pred'):
                cls_pred = tf.concat(cls_pred, axis=0)
            with tf.variable_scope('location_pred'):
                location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = decode_fn(location_pred)
            bboxes_pred = tf.concat(bboxes_pred, axis=0)
            selected_bboxes, selected_scores = parse_by_class(
                cls_pred, bboxes_pred, FLAGS.num_classes,
                FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk,
                FLAGS.nms_topk, FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        '''
        config = tf.ConfigProto(allow_soft_placement=True, inter_op_parallelism_threads=1, intra_op_parallelism_threads=1)
        config.mlu_options.data_parallelism = 1
        config.mlu_options.model_parallelism = 1
        config.mlu_options.core_num = 1
        config.mlu_options.core_version = 'MLU270'
        config.mlu_options.precision = 'float'
        with tf.Session(config = config) as sess:
        '''
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            np_image = imread('demo/test.jpg')
            labels_, scores_, bboxes_ = sess.run(
                [all_labels, all_scores, all_bboxes],
                feed_dict={image_input: np_image})
            #print('labels_', labels_, type(labels_), labels_.shape)
            #print('scores_', scores_, type(scores_), scores_.shape)
            #print('bboxes_', bboxes_, type(bboxes_), bboxes_.shape, bboxes_.shape[0])

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                          labels_,
                                                          scores_,
                                                          bboxes_,
                                                          thickness=2)
            imsave('demo/test_out.jpg', img_to_draw)
            saver.save(sess, 'model/ssd300_vgg16/ssd300_vgg16', global_step=0)
Пример #5
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features = ssd_preprocessing.preprocess_for_eval(
            image_input,
            out_shape,
            data_format=FLAGS.data_format,
            output_rgb=False)
        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator.AnchorCreator(
            out_shape,
            layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                           (1, 1)],
            anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                           (0.9, )],
            extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ),
                                 (0.6315, ), (0.8078, ), (0.9836, )],
            anchor_ratios=[(2., .5),
                           (2., 3., .5, 0.3333), (2., 3., .5, 0.3333),
                           (2., 3., .5, 0.3333), (2., .5), (2., .5)],
            layer_steps=[8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        decode_fn = lambda pred: anchor_encoder_decoder.ext_decode_all_anchors(
            pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(features, training=False)
            location_pred, cls_pred = ssd_net.multibox_head(
                feature_layers,
                FLAGS.num_classes,
                all_num_anchors_depth,
                data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                ]
                location_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
                ]

            cls_pred = [
                tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred
            ]
            location_pred = [
                tf.reshape(pred, [-1, 4]) for pred in location_pred
            ]

            cls_pred = tf.concat(cls_pred, axis=0)
            location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = decode_fn(location_pred)
            bboxes_pred = tf.concat(bboxes_pred, axis=0)
            selected_bboxes, selected_scores = parse_by_class(
                cls_pred, bboxes_pred, FLAGS.num_classes,
                FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk,
                FLAGS.nms_topk, FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            np_image = imread('./demo/test.jpg')
            labels_, scores_, bboxes_ = sess.run(
                [all_labels, all_scores, all_bboxes],
                feed_dict={
                    image_input: np_image,
                    shape_input: np_image.shape[:-1]
                })

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                          labels_,
                                                          scores_,
                                                          bboxes_,
                                                          thickness=2)
            imsave('./demo/test_out.jpg', img_to_draw)
Пример #6
0
def ssd(path):
# def ssd_res(img_path):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2,))

        features = ssd_preprocessing.preprocess_for_eval(image_input, out_shape, data_format=FLAGS.data_format, output_rgb=False)
        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator.AnchorCrealog_device_placementtor(out_shape,
                                                    layers_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
                                                    anchor_scales = [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)],
                                                    extra_anchor_scales = [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)],
                                                    anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)],
                                                    #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)],
                                                    layer_steps = [8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors()

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * 6,
                                                            positive_threshold = None,
                                                            ignore_threshold = None,
                                                            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        decode_fn = lambda pred : anchor_encoder_decoder.ext_decode_all_anchors(pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(features, training=False)
            location_pred, cls_pred = ssd_net.multibox_head(feature_layers, FLAGS.num_classes, all_num_anchors_depth, data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred]
                location_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred]

            cls_pred = [tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred]
            location_pred = [tf.reshape(pred, [-1, 4]) for pred in location_pred]

            cls_pred = tf.concat(cls_pred, axis=0)
            location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = decode_fn(location_pred)
            bboxes_pred = tf.concat(bboxes_pred, axis=0)


            selected_bboxes, selected_scores = parse_by_class(cls_pred, bboxes_pred,
                                                            FLAGS.num_classes, FLAGS.select_threshold, FLAGS.min_size,
                                                            FLAGS.keep_topk, FLAGS.nms_topk, FLAGS.nms_threshold)



            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            saver.restore(sess, get_checkpoint())

            np_image = imread(path)
            im = Image.open(path)
            print(np_image.shape)

            labels_, scores_, bboxes_ = sess.run([all_labels, all_scores, all_bboxes], feed_dict = {image_input : np_image, shape_input : np_image.shape[:-1]})

            all_bboxes = sess.run([bboxes_pred], feed_dict = {image_input : np_image, shape_input : np_image.shape[:-1]})




            shape = np_image.shape
            for j in range(len(all_bboxes[0])):
                all_box = all_bboxes[0][j]
                p1 = (int(all_box[0] * shape[0]), int(all_box[1] * shape[1]))
                p2 = (int(all_box[2] * shape[0]), int(all_box[3] * shape[1]))
                if (p2[0] - p1[0] < 1) or (p2[1] - p1[1] < 1):
                    continue
                x1 = p1[1]
                y1 = p1[0]
                x2 = p2[1]
                y2 = p2[0]

                obj = im.crop((x1, y1, x2, y2))

                num_str = str(j)
                num_str = num_str.zfill(5)
                obj.save('./res/img/{}.jpg'.format(num_str))

                cor = str(x1) + ',' + str(y1) + ',' + str(x2) + ',' +str(y2)
                f2 = open('./res/cor.txt', 'a')
                f2.write(cor + '\n')


                zero_str = str(0)
                f = open('./res/label.txt', 'a')
                f.write(num_str + ',' + zero_str + '\n')
                f.close()

            num1 = 0
            for i in range(bboxes_.shape[0]):
                bbox = bboxes_[i]
                p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
                p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
                num1 = num1 + 1

                if (p2[0] - p1[0] < 1) or (p2[1] - p1[1] < 1):
                    continue
                x1 = p1[1]
                y1 = p1[0]
                x2 = p2[1]
                y2 = p2[0]



                cor1 = str(x1) + ',' + str(y1) + ',' + str(x2) + ',' + str(y2)


                num = 0
                with open('./res/cor.txt', 'r') as f11, open('./res/label.txt', '+r') as f22:
                    for line in f11:
                        num = num + 1
                        if cor1 in line:
                            num11 = str(num)
                            print(num11 + '\n')

                            num11 = num11.zfill(5)
                            ber = num11 + ',' + str(0)
                            aft = num11 + ',' + str(labels_[i])

                            t = f22.read()
                            t = t.replace(ber, aft)
                            f22.seek(0, 0)
                            f22.write(t)
            print(num1)

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image, labels_, scores_, bboxes_, thickness=2)
            imsave('./demo/out.jpg', img_to_draw)
Пример #7
0
def ssd_model_fn(features, labels, mode, params):
    """model_fn for SSD to be used with our Estimator."""
    filename = features['filename']
    filename = tf.identity(filename, name='filename')
    shape = features['shape']
    output_shape = features['output_shape']
    features = features['image']

    anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(positive_threshold=None, ignore_threshold=None, prior_scaling=[0.1, 0.1, 0.2, 0.2])
    all_anchor_scales = [(30.,), (60.,), (112.5,), (165.,), (217.5,), (270.,)]
    all_extra_scales = [(42.43,), (82.17,), (136.23,), (189.45,), (242.34,), (295.08,)]
    all_anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)]
    #all_anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)]

    with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE):
        backbone = ssd_net.VGG16Backbone(params['data_format'])
        # forward features
        feature_layers = backbone.forward(features, training=(mode == tf.estimator.ModeKeys.TRAIN))
        # generate anchors according to the feature map size
        with tf.device('/cpu:0'):
            if params['data_format'] == 'channels_first':
                all_layer_shapes = [tf.shape(feat)[2:] for feat in feature_layers]
            else:
                all_layer_shapes = [tf.shape(feat)[1:3] for feat in feature_layers]
            all_layer_strides = [8, 16, 32, 64, 100, 300]
            total_layers = len(all_layer_shapes)
            anchors_height = list()
            anchors_width = list()
            anchors_depth = list()
            for ind in range(total_layers):
                _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height(all_anchor_scales[ind], all_extra_scales[ind], all_anchor_ratios[ind], name='get_anchors_width_height{}'.format(ind))
                anchors_height.append(_anchors_height)
                anchors_width.append(_anchors_width)
                anchors_depth.append(_anchor_depth)
            anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, _ = anchor_encoder_decoder.get_all_anchors(tf.squeeze(output_shape, axis=0),
                                                                            anchors_height, anchors_width, anchors_depth,
                                                                            [0.5] * total_layers, all_layer_shapes, all_layer_strides,
                                                                            [0.] * total_layers, [False] * total_layers)
        # generate predictions based on anchors
        location_pred, cls_pred = ssd_net.multibox_head(feature_layers, params['num_classes'], anchors_depth, data_format=params['data_format'])
        if params['data_format'] == 'channels_first':
            cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred]
            location_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred]

        cls_pred = [tf.reshape(pred, [tf.shape(features)[0], -1, params['num_classes']]) for pred in cls_pred]
        location_pred = [tf.reshape(pred, [tf.shape(features)[0], -1, 4]) for pred in location_pred]

        cls_pred = tf.concat(cls_pred, axis=1)
        location_pred = tf.concat(location_pred, axis=1)

        cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']])
        location_pred = tf.reshape(location_pred, [-1, 4])
    # decode predictions
    with tf.device('/cpu:0'):
        bboxes_pred = anchor_encoder_decoder.decode_anchors(location_pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax)
        selected_bboxes, selected_scores = bbox_util.parse_by_class(tf.squeeze(output_shape, axis=0), cls_pred, bboxes_pred,
                                                        params['num_classes'], params['select_threshold'], params['min_size'],
                                                        params['keep_topk'], params['nms_topk'], params['nms_threshold'])

    labels_list = []
    scores_list = []
    bboxes_list = []
    for k, v in selected_scores.items():
        labels_list.append(tf.ones_like(v, tf.int32) * k)
        scores_list.append(v)
        bboxes_list.append(selected_bboxes[k])
    all_labels = tf.concat(labels_list, axis=0)
    all_scores = tf.concat(scores_list, axis=0)
    all_bboxes = tf.concat(bboxes_list, axis=0)
    save_image_op = tf.py_func(save_image_with_bbox,
                        [ssd_preprocessing.unwhiten_image(tf.squeeze(features, axis=0), output_rgb=False),
                        all_labels * tf.to_int32(all_scores > 0.3),
                        all_scores,
                        all_bboxes],
                        tf.int64, stateful=True)
    tf.identity(save_image_op, name='save_image_op')
    predictions = {'filename': filename, 'shape': shape, 'output_shape': output_shape }
    for class_ind in range(1, params['num_classes']):
        predictions['scores_{}'.format(class_ind)] = tf.expand_dims(selected_scores[class_ind], axis=0)
        predictions['bboxes_{}'.format(class_ind)] = tf.expand_dims(selected_bboxes[class_ind], axis=0)

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
              mode=mode,
              predictions=predictions,
              prediction_hooks=None, loss=None, train_op=None)
    else:
        raise ValueError('This script only support "PREDICT" mode!')
Пример #8
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2
        anchor_creator = anchor_manipulator.AnchorCreator(
            out_shape,
            layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                           (1, 1)],
            anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                           (0.9, )],
            extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ),
                                 (0.6315, ), (0.8078, ), (0.9836, )],
            anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333),
                           (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333),
                           (1., 2., .5), (1., 2., .5)],
            #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333),
            #(2., 3., .5, 0.3333), (2., .5), (2., .5)],
            layer_steps=[8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        def decode_fn(pred):
            return anchor_encoder_decoder.ext_decode_all_anchors(
                pred, all_anchors, all_num_anchors_depth,
                all_num_anchors_spatial)

        with tf.name_scope('define_input'):
            image_input = tf.placeholder(tf.float32,
                                         shape=(1, 300, 300, 3),
                                         name='image_input')
        print('image_input', image_input)
        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[image_input],
                               reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(image_input, training=False)
            location_pred, cls_pred = ssd_net.multibox_head(
                feature_layers,
                FLAGS.num_classes,
                all_num_anchors_depth,
                data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                ]
                location_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
                ]

            cls_pred = [
                tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred
            ]
            location_pred = [
                tf.reshape(pred, [-1, 4]) for pred in location_pred
            ]

            with tf.variable_scope('cls_pred'):
                cls_pred = tf.concat(cls_pred, axis=0)
            with tf.variable_scope('location_pred'):
                location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = decode_fn(location_pred)
            bboxes_pred = tf.concat(bboxes_pred, axis=0)
            selected_bboxes, selected_scores = parse_by_class(
                cls_pred, bboxes_pred, FLAGS.num_classes,
                FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk,
                FLAGS.nms_topk, FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        '''
        config = tf.ConfigProto(allow_soft_placement=True, inter_op_parallelism_threads=1, intra_op_parallelism_threads=1)
        config.mlu_options.data_parallelism = 1
        config.mlu_options.model_parallelism = 1
        config.mlu_options.core_num = 1
        config.mlu_options.core_version = 'MLU270'
        config.mlu_options.precision = 'float'
        with tf.Session(config = config) as sess:
        '''
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            _R_MEAN = 123.68
            _G_MEAN = 116.78
            _B_MEAN = 103.94
            means = [
                _B_MEAN,
                _G_MEAN,
                _R_MEAN,
            ]
            np_image = cv2.imread('demo/test.jpg')
            image = cv2.resize(
                np_image, (FLAGS.train_image_size, FLAGS.train_image_size))
            image = (image - means)  # / 255.0
            image = np.expand_dims(image, axis=0)
            print('image', type(image), image.shape)
            '''
            image = tf.to_float(np_image)
            image = tf.image.resize_images(image, out_shape,
                                           method=tf.image.ResizeMethod.BILINEAR, align_corners=False)
            image.set_shape(out_shape + [3])
            num_channels = image.get_shape().as_list()[-1]
            channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image)
            for i in range(num_channels):
                channels[i] -= means[i]
            image = tf.concat(axis=2, values=channels)
            image_channels = tf.unstack(image, axis=-1, name='split_rgb')
            image = tf.stack([image_channels[2], image_channels[1], image_channels[0]], axis=-1, name='merge_bgr')
            '''

            labels_, scores_, bboxes_ = sess.run(
                [all_labels, all_scores, all_bboxes],
                feed_dict={image_input: image})
            #print('labels_', labels_, type(labels_), labels_.shape)
            #print('scores_', scores_, type(scores_), scores_.shape)
            #print('bboxes_', bboxes_, type(bboxes_), bboxes_.shape, bboxes_.shape[0])

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                          labels_,
                                                          scores_,
                                                          bboxes_,
                                                          thickness=2)
            cv2.imwrite('demo/test_out.jpg', img_to_draw)
            saver.save(sess,
                       'model/ssd300_vgg16/ssd300_vgg16_short',
                       global_step=0)
Пример #9
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features, output_shape = ssd_preprocessing.preprocess_for_eval(
            image_input,
            out_shape,
            data_format=FLAGS.data_format,
            output_rgb=False)
        features = tf.expand_dims(features, axis=0)
        output_shape = tf.expand_dims(output_shape, axis=0)

        all_anchor_scales = [(30., ), (60., ), (112.5, ), (165., ), (217.5, ),
                             (270., )]
        all_extra_scales = [(42.43, ), (82.17, ), (136.23, ), (189.45, ),
                            (242.34, ), (295.08, )]
        all_anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333),
                             (1., 2., 3., .5, 0.3333),
                             (1., 2., 3., .5, 0.3333), (1., 2., .5),
                             (1., 2., .5)]
        # all_anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)]

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(features, training=False)
            with tf.device('/cpu:0'):
                anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
                    positive_threshold=None,
                    ignore_threshold=None,
                    prior_scaling=[0.1, 0.1, 0.2, 0.2])

                if FLAGS.data_format == 'channels_first':
                    all_layer_shapes = [
                        tf.shape(feat)[2:] for feat in feature_layers
                    ]
                else:
                    all_layer_shapes = [
                        tf.shape(feat)[1:3] for feat in feature_layers
                    ]
                all_layer_strides = [8, 16, 32, 64, 100, 300]
                total_layers = len(all_layer_shapes)
                anchors_height = list()
                anchors_width = list()
                anchors_depth = list()
                for ind in range(total_layers):
                    _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height(
                        all_anchor_scales[ind],
                        all_extra_scales[ind],
                        all_anchor_ratios[ind],
                        name='get_anchors_width_height{}'.format(ind))
                    anchors_height.append(_anchors_height)
                    anchors_width.append(_anchors_width)
                    anchors_depth.append(_anchor_depth)
                anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, _ = anchor_encoder_decoder.get_all_anchors(
                    tf.squeeze(output_shape, axis=0), anchors_height,
                    anchors_width, anchors_depth, [0.5] * total_layers,
                    all_layer_shapes, all_layer_strides, [0.] * total_layers,
                    [False] * total_layers)
            location_pred, cls_pred = ssd_net.multibox_head(
                feature_layers,
                FLAGS.num_classes,
                anchors_depth,
                data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                ]
                location_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
                ]

            cls_pred = [
                tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred
            ]
            location_pred = [
                tf.reshape(pred, [-1, 4]) for pred in location_pred
            ]

            cls_pred = tf.concat(cls_pred, axis=0)
            location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = anchor_encoder_decoder.decode_anchors(
                location_pred, anchors_ymin, anchors_xmin, anchors_ymax,
                anchors_xmax)
            selected_bboxes, selected_scores = bbox_util.parse_by_class(
                tf.squeeze(output_shape, axis=0), cls_pred, bboxes_pred,
                FLAGS.num_classes, FLAGS.select_threshold, FLAGS.min_size,
                FLAGS.keep_topk, FLAGS.nms_topk, FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            np_image = imread('./demo/test.jpg')
            labels_, scores_, bboxes_, output_shape_ = sess.run(
                [all_labels, all_scores, all_bboxes, output_shape],
                feed_dict={
                    image_input: np_image,
                    shape_input: np_image.shape[:-1]
                })
            bboxes_[:,
                    0] = bboxes_[:, 0] * np_image.shape[0] / output_shape_[0,
                                                                           0]
            bboxes_[:,
                    1] = bboxes_[:, 1] * np_image.shape[1] / output_shape_[0,
                                                                           1]
            bboxes_[:,
                    2] = bboxes_[:, 2] * np_image.shape[0] / output_shape_[0,
                                                                           0]
            bboxes_[:,
                    3] = bboxes_[:, 3] * np_image.shape[1] / output_shape_[0,
                                                                           1]

            img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                          labels_,
                                                          scores_,
                                                          bboxes_,
                                                          thickness=2)
            imsave('./demo/test_out.jpg', img_to_draw)
Пример #10
0
def main(_):
    with tf.Graph().as_default():
        out_shape = [FLAGS.train_image_size] * 2

        image_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
        shape_input = tf.placeholder(tf.int32, shape=(2, ))

        features = ssd_preprocessing.preprocess_for_eval(
            image_input,
            out_shape,
            data_format=FLAGS.data_format,
            output_rgb=False)
        features = tf.expand_dims(features, axis=0)

        anchor_creator = anchor_manipulator.AnchorCreator(
            out_shape,
            layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3),
                           (1, 1)],
            anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ),
                           (0.9, )],
            extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ),
                                 (0.6315, ), (0.8078, ), (0.9836, )],
            anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333),
                           (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333),
                           (1., 2., .5), (1., 2., .5)],
            #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)],
            layer_steps=[8, 16, 32, 64, 100, 300])
        all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors(
        )

        anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(
            allowed_borders=[1.0] * 6,
            positive_threshold=None,
            ignore_threshold=None,
            prior_scaling=[0.1, 0.1, 0.2, 0.2])

        decode_fn = lambda pred: anchor_encoder_decoder.ext_decode_all_anchors(
            pred, all_anchors, all_num_anchors_depth, all_num_anchors_spatial)

        with tf.variable_scope(FLAGS.model_scope,
                               default_name=None,
                               values=[features],
                               reuse=tf.AUTO_REUSE):
            backbone = ssd_net.VGG16Backbone(FLAGS.data_format)
            feature_layers = backbone.forward(features, training=False)
            location_pred, cls_pred = ssd_net.multibox_head(
                feature_layers,
                FLAGS.num_classes,
                all_num_anchors_depth,
                data_format=FLAGS.data_format)
            if FLAGS.data_format == 'channels_first':
                cls_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred
                ]
                location_pred = [
                    tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
                ]

            cls_pred = [
                tf.reshape(pred, [-1, FLAGS.num_classes]) for pred in cls_pred
            ]
            location_pred = [
                tf.reshape(pred, [-1, 4]) for pred in location_pred
            ]

            cls_pred = tf.concat(cls_pred, axis=0)
            location_pred = tf.concat(location_pred, axis=0)

        with tf.device('/cpu:0'):
            bboxes_pred = decode_fn(location_pred)
            bboxes_pred = tf.concat(bboxes_pred, axis=0)
            selected_bboxes, selected_scores = parse_by_class(
                cls_pred, bboxes_pred, FLAGS.num_classes,
                FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk,
                FLAGS.nms_topk, FLAGS.nms_threshold)

            labels_list = []
            scores_list = []
            bboxes_list = []
            for k, v in selected_scores.items():
                labels_list.append(tf.ones_like(v, tf.int32) * k)
                scores_list.append(v)
                bboxes_list.append(selected_bboxes[k])
            all_labels = tf.concat(labels_list, axis=0)
            all_scores = tf.concat(scores_list, axis=0)
            all_bboxes = tf.concat(bboxes_list, axis=0)

        saver = tf.train.Saver()
        with tf.Session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)

            saver.restore(sess, get_checkpoint())

            for i in range(video_frame_cnt):
                ret, img_ori = vid.read()

                # height_ori, width_ori = img_ori.shape[:2]
                # img = cv2.resize(img_ori, tuple(args.new_size))
                img = cv2.cvtColor(img_ori, cv2.COLOR_BGR2RGB)
                np_image = np.asarray(img, np.float32)

                start_time = time.time()
                labels_, scores_, bboxes_ = sess.run(
                    [all_labels, all_scores, all_bboxes],
                    feed_dict={
                        image_input: np_image,
                        shape_input: np_image.shape[:-1]
                    })
                end_time = time.time()

                img_to_draw = draw_toolbox.bboxes_draw_on_img(np_image,
                                                              labels_,
                                                              scores_,
                                                              bboxes_,
                                                              thickness=2)
                cv2.putText(img_to_draw,
                            '{:.2f}ms'.format((end_time - start_time) * 1000),
                            (40, 40),
                            0,
                            fontScale=1,
                            color=(0, 255, 0),
                            thickness=2)

                imsave('./test_out.jpg', img_to_draw)

                new_img = cv2.imread('./test_out.jpg')
                cv2.imshow('image', new_img)

                videoWriter.write(new_img)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            vid.release()
            videoWriter.release()
Пример #11
0
def ssd_model_fn(features, labels, mode, params):
    # SSD 模型的核心部分
    shape = labels['shape']  # 预测结果的大小
    loc_targets = labels['loc_targets']  # 真实位置
    cls_targets = labels['cls_targets']  # 分类位置
    match_scores = labels['match_scores']  # 匹配置信度
    global global_anchor_info  # anchor的所有信息
    decode_fn = global_anchor_info['decode_fn']  # 编码的anchor
    num_anchors_per_layer = global_anchor_info[
        'num_anchors_per_layer']  # 每一层的anchor数量
    all_num_anchors_depth = global_anchor_info['all_num_anchors_depth']
    with tf.variable_scope(params['model_scope'],
                           default_name=None,
                           values=[features],
                           reuse=tf.AUTO_REUSE):
        backbone = ssd_net.VGG16Backbone(params['data_format'])
        feature_layers = backbone.forward(
            features, training=(mode == tf.estimator.ModeKeys.TRAIN))
        location_pred, cls_pred = ssd_net.multibox_head(
            feature_layers,
            params['num_classes'],
            all_num_anchors_depth,
            data_format=params['data_format'])  # 预测操作
        if params['data_format'] == 'channels_first':
            # GPU数据操作
            cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred]
            location_pred = [
                tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
            ]
        cls_pred = [
            tf.reshape(pred,
                       [tf.shape(features)[0], -1, params['num_classes']])
            for pred in cls_pred
        ]
        location_pred = [
            tf.reshape(pred, [tf.shape(features)[0], -1, 4])
            for pred in location_pred
        ]
        cls_pred = tf.concat(cls_pred, axis=1)
        location_pred = tf.concat(location_pred, axis=1)
        cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']])
        location_pred = tf.reshape(location_pred, [-1, 4])
    with tf.device('/cpu:0'):
        with tf.control_dependencies([cls_pred, location_pred]):
            with tf.name_scope('post_forward'):
                # 解码过程
                bboxes_pred = tf.map_fn(
                    lambda _preds: decode_fn(_preds),
                    tf.reshape(location_pred, [tf.shape(features)[0], -1, 4]),
                    dtype=[tf.float32] * len(num_anchors_per_layer),
                    back_prop=False)
                bboxes_pred = [
                    tf.reshape(preds, [-1, 4]) for preds in bboxes_pred
                ]
                bboxes_pred = tf.concat(bboxes_pred, axis=0)
                flaten_cls_targets = tf.reshape(cls_targets, [-1])
                flaten_match_scores = tf.reshape(match_scores, [-1])
                flaten_loc_targets = tf.reshape(loc_targets, [-1, 4])
                # 每一个数据都有一个label
                positive_mask = flaten_cls_targets > 0
                n_positives = tf.count_nonzero(positive_mask)
                batch_n_positives = tf.count_nonzero(cls_targets, -1)
                batch_negtive_mask = tf.equal(cls_targets, 0)
                batch_n_negtives = tf.count_nonzero(batch_negtive_mask, -1)
                batch_n_neg_select = tf.cast(
                    params['negative_ratio'] *
                    tf.cast(batch_n_positives, tf.float32), tf.int32)
                batch_n_neg_select = tf.minimum(
                    batch_n_neg_select, tf.cast(batch_n_negtives, tf.int32))
                # 难分负样本的选取
                predictions_for_bg = tf.nn.softmax(
                    tf.reshape(
                        cls_pred,
                        [tf.shape(features)[0], -1, params['num_classes']
                         ]))[:, :, 0]
                prob_for_negtives = tf.where(
                    batch_negtive_mask,
                    0. - predictions_for_bg,
                    # ignore all the positives
                    0. - tf.ones_like(predictions_for_bg))
                topk_prob_for_bg, _ = tf.nn.top_k(
                    prob_for_negtives, k=tf.shape(prob_for_negtives)[1])
                score_at_k = tf.gather_nd(
                    topk_prob_for_bg,
                    tf.stack([
                        tf.range(tf.shape(features)[0]), batch_n_neg_select - 1
                    ],
                             axis=-1))
                selected_neg_mask = prob_for_negtives >= tf.expand_dims(
                    score_at_k, axis=-1)

                final_mask = tf.stop_gradient(
                    tf.logical_or(
                        tf.reshape(
                            tf.logical_and(batch_negtive_mask,
                                           selected_neg_mask), [-1]),
                        positive_mask))  # 选取的正负样本
                total_examples = tf.count_nonzero(final_mask)  # 总共的样本
                cls_pred = tf.boolean_mask(cls_pred, final_mask)  # 选取的预测样本
                location_pred = tf.boolean_mask(
                    location_pred, tf.stop_gradient(positive_mask))
                flaten_cls_targets = tf.boolean_mask(
                    tf.clip_by_value(flaten_cls_targets, 0,
                                     params['num_classes']), final_mask)
                flaten_loc_targets = tf.stop_gradient(
                    tf.boolean_mask(flaten_loc_targets, positive_mask))
                predictions = {
                    'classes':
                    tf.argmax(cls_pred, axis=-1),
                    'probabilities':
                    tf.reduce_max(tf.nn.softmax(cls_pred,
                                                name='softmax_tensor'),
                                  axis=-1),
                    'loc_predict':
                    bboxes_pred
                }
                cls_accuracy = tf.metrics.accuracy(flaten_cls_targets,
                                                   predictions['classes'])
                metrics = {'cls_accuracy': cls_accuracy}
                tf.identity(cls_accuracy[1], name='cls_accuracy')
                tf.summary.scalar('cls_accuracy', cls_accuracy[1])

    if mode == tf.estimator.ModeKeys.PREDICT:
        # 预测
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
    # 计算loss函数,使用softmax交叉熵和L2正则化
    cross_entropy = tf.losses.sparse_softmax_cross_entropy(
        labels=flaten_cls_targets,
        logits=cls_pred) * (params['negative_ratio'] + 1.)
    tf.identity(cross_entropy, name='cross_entropy_loss')
    tf.summary.scalar('cross_entropy_loss', cross_entropy)
    loc_loss = modified_smooth_l1(location_pred, flaten_loc_targets, sigma=1.)
    loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1),
                              name='location_loss')
    tf.summary.scalar('location_loss', loc_loss)
    tf.losses.add_loss(loc_loss)
    l2_loss_vars = []
    for trainable_var in tf.trainable_variables():
        if '_bn' not in trainable_var.name:
            if 'conv4_3_scale' not in trainable_var.name:
                l2_loss_vars.append(tf.nn.l2_loss(trainable_var))
            else:
                l2_loss_vars.append(tf.nn.l2_loss(trainable_var) * 0.1)
    # 将权重衰减添加到loss函数中.
    total_loss = tf.add(cross_entropy + loc_loss,
                        tf.multiply(params['weight_decay'],
                                    tf.add_n(l2_loss_vars),
                                    name='l2_loss'),
                        name='total_loss')
    if mode == tf.estimator.ModeKeys.TRAIN:
        # 训练
        global_step = tf.train.get_or_create_global_step()
        lr_values = [
            params['learning_rate'] * decay
            for decay in params['lr_decay_factors']
        ]
        learning_rate = tf.train.piecewise_constant(
            tf.cast(global_step, tf.int32),
            [int(_) for _ in params['decay_boundaries']], lr_values)
        truncated_learning_rate = tf.maximum(learning_rate,
                                             tf.constant(
                                                 params['end_learning_rate'],
                                                 dtype=learning_rate.dtype),
                                             name='learning_rate')
        tf.summary.scalar('learning_rate', truncated_learning_rate)
        optimizer = tf.train.MomentumOptimizer(
            learning_rate=truncated_learning_rate, momentum=params['momentum'])
        optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(total_loss, global_step)
    else:
        train_op = None
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        loss=total_loss,
        train_op=train_op,
        eval_metric_ops=metrics,
        scaffold=tf.train.Scaffold(init_fn=get_init_fn()))
Пример #12
0
def ssd_model_fn(features, labels, mode, params):
    """model_fn for SSD to be used with our Estimator."""
    shape = labels['shape']
    loc_targets = labels['loc_targets']
    cls_targets = labels['cls_targets']
    match_scores = labels['match_scores']

    global global_anchor_info
    decode_fn = global_anchor_info['decode_fn']
    num_anchors_per_layer = global_anchor_info['num_anchors_per_layer']
    all_num_anchors_depth = global_anchor_info['all_num_anchors_depth']

    # bboxes_pred = decode_fn(loc_targets[0])
    # bboxes_pred = [tf.reshape(preds, [-1, 4]) for preds in bboxes_pred]
    # bboxes_pred = tf.concat(bboxes_pred, axis=0)
    # save_image_op = tf.py_func(save_image_with_bbox,
    #                         [ssd_preprocessing.unwhiten_image(features[0]),
    #                         tf.clip_by_value(cls_targets[0], 0, tf.int64.max),
    #                         match_scores[0],
    #                         bboxes_pred],
    #                         tf.int64, stateful=True)
    # with tf.control_dependencies([save_image_op]):

    #print(all_num_anchors_depth)
    with tf.variable_scope(params['model_scope'],
                           default_name=None,
                           values=[features],
                           reuse=tf.AUTO_REUSE):
        backbone = ssd_net.VGG16Backbone(params['data_format'])
        feature_layers = backbone.forward(
            features, training=(mode == tf.estimator.ModeKeys.TRAIN))
        #print(feature_layers)

        #location_pred:[[batch_Size,4, 38, 38],[]]
        #cls_pred:[[batch_Size,num_classes, 38, 38, ]...
        #                 10*10*6*num_classes, 5*5*6*num_classes, 3*3*4*num_classes, 1*!*4*num_classes]
        location_pred, cls_pred = ssd_net.multibox_head(
            feature_layers,
            params['num_classes'],
            all_num_anchors_depth,
            data_format=params['data_format'])

        if params['data_format'] == 'channels_first':
            cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred]
            location_pred = [
                tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred
            ]
        #cls_pred:[[batch_size, 38, 38, 4],[]...]
        #location_pred:[[batch_size, 38, 38, 1],[]...]

        cls_pred = [
            tf.reshape(pred,
                       [tf.shape(features)[0], -1, params['num_classes']])
            for pred in cls_pred
        ]
        location_pred = [
            tf.reshape(pred, [tf.shape(features)[0], -1, 4])
            for pred in location_pred
        ]
        #clas_pred:[[batch_size, 38*38*4, class_num],...]
        #location_pred:[[batch_size, 38*38*4 , 4]...]

        cls_pred = tf.concat(cls_pred, axis=1)
        location_pred = tf.concat(location_pred, axis=1)

        cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']])
        location_pred = tf.reshape(location_pred, [-1, 4])
        # clas_pred:[batch_size*(38*38*4 + 19*19*6 + 10*10*6 + 5*5*6 + 3*3*4 + 1*1*4), num_class]
        # location_pred:[batch_size*(38*38*4 + 19*19*6 + 10*10*6 + 5*5*6 + 3*3*4 + 1*1*4), 4]
    with tf.device('/cpu:0'):
        with tf.control_dependencies([cls_pred, location_pred]):
            with tf.name_scope('post_forward'):
                #location_pred:[batch_size, 8732, 4] 里面包含着每一个prior_bbox的偏移量预测值
                #decode_fn:根据8732个prior_bbox自身的坐标与与之对应的偏移量,就可以得出实际的8732个预测框的位置
                bboxes_pred = decode_fn(
                    tf.reshape(location_pred, [tf.shape(features)[0], -1, 4]))
                bboxes_pred = tf.reshape(bboxes_pred, [-1, 4])
                #bboxes_pred:[batch_size*8732, 4], 4的含义是bbox的[ymin, xmin, ymax, xmax]

                #cls_targets:[batch_Size, 8732]
                flaten_cls_targets = tf.reshape(cls_targets,
                                                [-1])  #[batch_size*8732]
                flaten_match_scores = tf.reshape(match_scores, [-1])
                flaten_loc_targets = tf.reshape(loc_targets,
                                                [-1, 4])  #[batch_size*8732, 4]

                # each positive examples has one label
                positive_mask = flaten_cls_targets > 0
                n_positives = tf.count_nonzero(positive_mask)

                #batch_n_positives:[batch_size], 其中第i个数字x代表第i张图片上有x个正例prior_bbox。
                batch_n_positives = tf.count_nonzero(cls_targets > 0, -1)

                #batch_negative_mask:[batch_size, 8732].
                batch_negtive_mask = tf.equal(cls_targets, 0)
                #batch_n_negtives:[batch_size]其中第i个数字x代表第i张图片上有x个负例prior_bbox。
                batch_n_negtives = tf.count_nonzero(batch_negtive_mask, -1)

                #negative_ratio:3。 也就是说负例数量是正例的3倍
                batch_n_neg_select = tf.to_int32(
                    params['negative_ratio'] * tf.to_float(batch_n_positives))
                batch_n_neg_select = tf.minimum(batch_n_neg_select,
                                                tf.to_int32(batch_n_negtives))
                #batch_n_neg_select:[batch_size]->第i个数字x代表第i张图片选x个负例prior_bbox

                # hard negative mining for classification
                # predictions_for_bg:[batch_size, 8732]
                predictions_for_bg = tf.nn.softmax(
                    tf.reshape(
                        cls_pred,
                        [tf.shape(features)[0], -1, params['num_classes']
                         ]))[:, :, 0]
                prob_for_negtives = tf.where(
                    batch_negtive_mask,
                    0. - predictions_for_bg,
                    # ignore all the positives
                    0. - tf.ones_like(predictions_for_bg))
                #prob_for_negtives:[batch_size, 8732]。如果prior_bbox的label_cls为0则把背景预测值填进去,否则就填-1

                #topk_prob_for_bg:[batch_size, 8732],其中第二维度是从大大小排序的
                topk_prob_for_bg, _ = tf.nn.top_k(
                    prob_for_negtives, k=tf.shape(prob_for_negtives)[1])

                #score_at_k:[batch_size] 第i个数字x代表:第i张图片选m个负例prior_bbox, 而这m个框中预测是背景的最高分是-x。
                #换句话说,最低分代表预测得很离谱,明明是背景,但是它(-x)的分确很低。(带负号是因为line353,因为方便排序加上的)
                score_at_k = tf.gather_nd(
                    topk_prob_for_bg,
                    tf.stack([
                        tf.range(tf.shape(features)[0]), batch_n_neg_select - 1
                    ],
                             axis=-1))

                #selected_neg_mask:[batch_size, 8732].其中被选择的负例对应位置为True,否则是False
                selected_neg_mask = prob_for_negtives >= tf.expand_dims(
                    score_at_k, axis=-1)

                # include both selected negtive and all positive examples
                # final_mask:[batch_size, 8732], 被选中的正例和负例序号为True,其余为False。
                final_mask = tf.stop_gradient(
                    tf.logical_or(
                        tf.reshape(
                            tf.logical_and(batch_negtive_mask,
                                           selected_neg_mask), [-1]),
                        positive_mask))
                total_examples = tf.count_nonzero(final_mask)

                #假设batch个图片总共有m个正例,n个负例。
                #cls_pred:[m+n]
                cls_pred = tf.boolean_mask(cls_pred, final_mask)
                #location_pred:[m,4]
                location_pred = tf.boolean_mask(
                    location_pred, tf.stop_gradient(positive_mask))
                flaten_cls_targets = tf.boolean_mask(
                    tf.clip_by_value(flaten_cls_targets, 0,
                                     params['num_classes']), final_mask)
                flaten_loc_targets = tf.stop_gradient(
                    tf.boolean_mask(flaten_loc_targets, positive_mask))

                predictions = {
                    'classes':
                    tf.argmax(cls_pred, axis=-1),
                    'probabilities':
                    tf.reduce_max(tf.nn.softmax(cls_pred,
                                                name='softmax_tensor'),
                                  axis=-1),
                    'loc_predict':
                    bboxes_pred
                }

                cls_accuracy = tf.metrics.accuracy(flaten_cls_targets,
                                                   predictions['classes'])
                metrics = {'cls_accuracy': cls_accuracy}

                # Create a tensor named train_accuracy for logging purposes.
                tf.identity(cls_accuracy[1], name='cls_accuracy')
                tf.summary.scalar('cls_accuracy', cls_accuracy[1])

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate loss, which includes softmax cross entropy and L2 regularization.
    #cross_entropy = tf.cond(n_positives > 0, lambda: tf.losses.sparse_softmax_cross_entropy(labels=flaten_cls_targets, logits=cls_pred), lambda: 0.)# * (params['negative_ratio'] + 1.)
    #flaten_cls_targets=tf.Print(flaten_cls_targets, [flaten_loc_targets],summarize=50000)
    cross_entropy = tf.losses.sparse_softmax_cross_entropy(
        labels=flaten_cls_targets,
        logits=cls_pred) * (params['negative_ratio'] + 1.)
    # Create a tensor named cross_entropy for logging purposes.
    tf.identity(cross_entropy, name='cross_entropy_loss')
    tf.summary.scalar('cross_entropy_loss', cross_entropy)

    #loc_loss = tf.cond(n_positives > 0, lambda: modified_smooth_l1(location_pred, tf.stop_gradient(flaten_loc_targets), sigma=1.), lambda: tf.zeros_like(location_pred))
    loc_loss = modified_smooth_l1(location_pred, flaten_loc_targets, sigma=1.)
    #loc_loss = modified_smooth_l1(location_pred, tf.stop_gradient(gtargets))
    loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1),
                              name='location_loss')
    tf.summary.scalar('location_loss', loc_loss)
    tf.losses.add_loss(loc_loss)

    l2_loss_vars = []
    for trainable_var in tf.trainable_variables():
        if '_bn' not in trainable_var.name:
            if 'conv4_3_scale' not in trainable_var.name:
                l2_loss_vars.append(tf.nn.l2_loss(trainable_var))
            else:
                l2_loss_vars.append(tf.nn.l2_loss(trainable_var) * 0.1)
    # Add weight decay to the loss. We exclude the batch norm variables because
    # doing so leads to a small improvement in accuracy.
    total_loss = tf.add(cross_entropy + loc_loss,
                        tf.multiply(params['weight_decay'],
                                    tf.add_n(l2_loss_vars),
                                    name='l2_loss'),
                        name='total_loss')

    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_or_create_global_step()

        lr_values = [
            params['learning_rate'] * decay
            for decay in params['lr_decay_factors']
        ]
        learning_rate = tf.train.piecewise_constant(
            tf.cast(global_step, tf.int32),
            [int(_) for _ in params['decay_boundaries']], lr_values)
        truncated_learning_rate = tf.maximum(learning_rate,
                                             tf.constant(
                                                 params['end_learning_rate'],
                                                 dtype=learning_rate.dtype),
                                             name='learning_rate')
        # Create a tensor named learning_rate for logging purposes.
        tf.summary.scalar('learning_rate', truncated_learning_rate)

        optimizer = tf.train.MomentumOptimizer(
            learning_rate=truncated_learning_rate, momentum=params['momentum'])
        optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(total_loss, global_step)
    else:
        train_op = None

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        loss=total_loss,
        train_op=train_op,
        eval_metric_ops=metrics,
        scaffold=tf.train.Scaffold(init_fn=get_init_fn()))