Пример #1
0
    def vgg_retinanet(num_classes, backbone='vgg16', inputs=None, modifier=None, *args, **kwargs):
        """ Constructs a retinanet model using a vgg backbone.

        Args
            num_classes: Number of classes to predict.
            backbone: Which backbone to use (one of ('vgg16', 'vgg19')).
            inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
            modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).

        Returns
            RetinaNet model with a VGG backbone.
        """
        # choose default input
        if inputs is None:
            inputs = keras.layers.Input(shape=(None, None, 3))

        # create the vgg backbone
        if backbone == 'vgg16':
            vgg = keras.applications.VGG16(input_tensor=inputs, include_top=False, weights='imagenet')
        elif backbone == 'vgg19':
            vgg = keras.applications.VGG19(input_tensor=inputs, include_top=False, weights='imagenet')
        else:
            raise ValueError("Backbone '{}' not recognized.".format(backbone))

        if modifier:
            vgg = modifier(vgg)

        layer_names = ["block3_pool", "block4_pool", "block5_pool"]
        layer_outputs = [vgg.get_layer(name).output for name in layer_names]
        layer_outputs = [None, None, *layer_outputs]
        return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=layer_outputs, **kwargs)
Пример #2
0
    def efficientnet_retinanet(num_classes, backbone='efficientnet-b0', inputs=None, modifier=None, **kwargs):
        """ Constructs a retinanet model using a vgg backbone.

        Args
            num_classes: Number of classes to predict.
            backbone: Which backbone to use (one of ('efficientnet-b0', 'efficientnet-b1', 'efficientnet-b2',
            'efficientnet-b3', 'efficientnet-b4', 'efficientnet-b5', 'efficientnet-b6', 'efficientnet-b7')).
            inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
            modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).

        Returns
            RetinaNet model with a VGG backbone.
        """
        # choose default input
        if inputs is None:
            inputs = keras.layers.Input(shape=(None, None, 3))

        # create the vgg backbone
        if backbone == 'efficientnet-b0':
            efficientnet = EfficientNetB0_(input_tensor=inputs)

        elif backbone == 'efficientnet-b1':
            efficientnet = EfficientNetB1_(input_tensor=inputs)

        elif backbone == 'efficientnet-b2':
            efficientnet = EfficientNetB2_(input_tensor=inputs)

        elif backbone == 'efficientnet-b3':
            efficientnet = EfficientNetB3_(input_tensor=inputs)

        elif backbone == 'efficientnet-b4':
            efficientnet = EfficientNetB4_(input_tensor=inputs)

        elif backbone == 'efficientnet-b5':
            efficientnet = EfficientNetB5_(input_tensor=inputs)

        elif backbone == 'efficientnet-b6':
            efficientnet = EfficientNetB6_(input_tensor=inputs)

        elif backbone == 'efficientnet-b7':
            efficientnet = EfficientNetB7_(input_tensor=inputs)

        else:
            raise ValueError("Backbone '{}' not recognized.".format(backbone))

        if modifier:
            efficientnet = modifier(efficientnet)

        return retinanet.retinanet(inputs=inputs,
                                   num_classes=num_classes,
                                   backbone_layers=efficientnet,
                                   **kwargs)
Пример #3
0
    def resnet_retinanet(self, num_classes, backbone='resnet50', inputs=None, modifier=None, **kwargs):
        """ Constructs a retinanet model using a resnet backbone.

        Args
            num_classes: Number of classes to predict.
            backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')).
            inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
            modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).

        Returns
            RetinaNet model with a ResNet backbone.
        """
        # choose default input
        if inputs is None:
            if keras.backend.image_data_format() == 'channels_first':
                inputs = keras.layers.Input(shape=(3, None, None))
            else:
                inputs = keras.layers.Input(shape=(None, None, 3))

        # create the resnet backbone
        if backbone == 'resnet50':
            model = keras_resnet.models.ResNet50(inputs, include_top=False, freeze_bn=True)
        elif backbone == 'resnet101':
            model = keras_resnet.models.ResNet101(inputs, include_top=False, freeze_bn=True)
        elif backbone == 'resnet152':
            model = keras_resnet.models.ResNet152(inputs, include_top=False, freeze_bn=True)
        else:
            raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone))

        # invoke modifier if given
        if modifier:
            model = modifier(model)

        layer_outputs = model.outputs[1:]
        backbone_layers = [None, None, *layer_outputs]
        # create the full model
        # return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone_layers, **kwargs)
        return self.model_with_weights(retinanet.retinanet(inputs=inputs, num_classes=num_classes,
                                                           backbone_layers=backbone_layers, **kwargs),
                                       weights=kwargs.get("weights"), skip_mismatch=True)
Пример #4
0
 def predict(self, inputs):
     """
     Perform predict from batched input tensor.
     During this time, anchors must be constructed before post-process or loss function called
     Args:
         inputs: a [batch_size, height, width, channels] image tensor
     Returns:
         prediction_dict: dict with items:
             inputs: [batch_size, height, width, channels] image tensor
             box_pred: [batch_size, num_anchors, 4] tensor containing predicted boxes
             cls_pred: [batch_size, num_anchors, num_classes+1] tensor containing class predictions
             feature_maps: a list of feature map tensor
             anchors: [num_anchors, 4] tensor containing anchors in normalized coordinates
     """
     num_anchors_per_loc = self._params.get("num_scales") * len(
         self._params.get("aspect_ratios"))
     prediction_dict = retinanet(inputs,
                                 self._num_classes,
                                 num_anchors_per_loc,
                                 is_training=self._is_training)
     # generate anchors
     feature_map_shape_list = self._get_feature_map_shape(
         prediction_dict["feature_map_list"])
     image_shape = shape_utils.combined_static_and_dynamic_shape(inputs)
     # initialize anchor generator
     if self._anchor_generator is None:
         self._anchor_generator = Anchor(
             feature_map_shape_list=feature_map_shape_list,
             img_size=(image_shape[1], image_shape[2]),
             anchor_scale=self._params.get("anchor_scale"),
             aspect_ratios=self._params.get("aspect_ratios"),
             scales_per_octave=self._params.get("num_scales"))
     self._anchors = self._anchor_generator.boxes
     prediction_dict["inputs"] = inputs
     prediction_dict["anchors"] = self._anchors
     return prqediction_dict
Пример #5
0
    def inception_retinanet(num_classes, backbone='inceptionV3', inputs=None, modifier=None, **kwargs):
        """ Constructs a retinanet model using a vgg backbone.

        Args
            num_classes: Number of classes to predict.
            backbone: Which backbone to use (one of ('inceptionV3'))
            inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)).
            modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example).

        Returns
            RetinaNet model with a VGG backbone.
        """
        # choose default input
        if inputs is None:
            inputs = keras.layers.Input(shape=(None, None, 3))

        if backbone == 'inceptionV3':

            inception_model = keras.applications.InceptionV3(input_tensor=inputs,
                                                             include_top=False,
                                                             weights='imagenet'
                                                             )

        else:
            raise ValueError("Backbone '{}' not recognized.".format(backbone))

        if modifier:
            inception_model = modifier(inception_model)

        # layer_names = [["mixed0", "mixed1", "mixed2"],[ "mixed3", "mixed4", ("mixed5", "mixed6"),"mixed7"],
        # ["mixed8", ("mixed9_0", "mixed9_1")]
        layer_names = ["mixed2", "mixed6", "mixed7"]
        layer_outputs = [inception_model.get_layer(name).output for name in layer_names]
        layer_outputs = [None, None, *layer_outputs]

        return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=layer_outputs, **kwargs)
Пример #6
0
    lr2 = config.TRAIN.LR2
    batch_size1 = config.TRAIN.BATCH_SIZE1
    batch_size2 = config.TRAIN.BATCH_SIZE2
    epoch1 = config.TRAIN.EPOCH1
    epoch2 = config.TRAIN.EPOCH2
    train_txt = config.TRAIN.TRAIN_TXT
    test_txt = config.TRAIN.TEST_TXT
    class_txt = config.TRAIN.CLASS_TXT
    input_shape = config.TRAIN.INPUT_SHAPE
    save_interval = config.TRAIN.SAVE_INTERVAL
    save_path = config.TRAIN.SAVE_PATH
    diary_path = config.TRAIN.DIARY_PATH

    inputs = keras.Input(shape=(416, 416, 3))
    retina_model = retinanet(inputs,
                             out_channels=256,
                             num_classes=6,
                             num_anchors=9)
    # retina_model.summary()
    retina_model.load_weights('./datas/resnet50_coco_best_v2.1.0.h5',
                              by_name=True,
                              skip_mismatch=True)
    print('load weights successfully!!')

    with open(train_txt) as f:
        train_anno = f.readlines()
    num_train = len(train_anno)
    with open(test_txt) as f:
        test_anno = f.readlines()
    num_test = len(test_anno)
    with open(class_txt) as f:
        classes = f.readlines()
Пример #7
0
image_shape=(224,224)

num_classes=2
num_scales = 2
aspect_ratios = (1.0, 2.0, 0.5)
anchor_scale = 4.0

test_gt_labels_list = create_one_hot_vector_gt_labels(test_gt_labels_list, num_classes+1)


num_anchors_per_loc = num_scales * len(aspect_ratios)

inputs = tf.placeholder(tf.float32, shape=(1,224,224,3))

prediction_dict = retinanet(inputs, num_classes, num_anchors_per_loc, is_training=True)



feature_map_shape_list = _get_feature_map_shape(prediction_dict["feature_map_list"])
anchor_generator = Anchor(feature_map_shape_list=feature_map_shape_list,
                                img_size=image_shape,
                                anchor_scale=anchor_scale,
                                aspect_ratios=aspect_ratios,
                                scales_per_octave=num_scales)

anchors = anchor_generator.boxes

print("anchors shape:{}".format(anchors.shape))

# unmatched_class_label = tf.constant((num_classes + 1) * [0], tf.float32)
Пример #8
0
def pred_clear():
    x = {}
    y = {}
    w = {}
    h = {}
    return

total = len(img_list)
print(('testing images = %d' % total))
remain = total
start_time = time.time()

for i, img_name in enumerate(img_list):
    img_path = os.path.join(test_dir, img_name)
    boxes, score, predicted_class = retinanet(model, img_path, 0.3)

    number = len(predicted_class)
    coordinate = ''
    for i in range(number):
        x[i] = boxes[i][0]
        y[i] = boxes[i][1]
        w[i] = boxes[i][2] - boxes[i][0]
        h[i] = boxes[i][3] - boxes[i][1]   
        if predicted_class[i] == 2 or 5 or 7:
            coordinate = coordinate + str(x[i]) + '_' + str(y[i]) + '_'+ str(w[i]) + '_' + str(h[i]) + ';'
            #print(score[i])        
    writer.writerow([img_name, coordinate[:-1]])
    pred_clear()

    remain -= 1