Exemplo n.º 1
0
    def annotate_image(self,
                       path_to_image,
                       num_gpus=1,
                       min_image_dimension=800,
                       max_image_dimension=1024,
                       steps_per_epoch=100,
                       validation_steps=70):

        labels_to_names = self._load_labels()

        model = models.load_model(self._path_to_model,
                                  backbone_name='resnet50')

        # load image
        image = read_image_bgr(path_to_image)

        # copy to draw on
        draw = image.copy()
        draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

        # preprocess image for network
        image = preprocess_image(image)
        image, scale = resize_image(image)

        # process image
        start = time.time()
        outputs = model.predict_on_batch(np.expand_dims(image, axis=0))
        print("processing time: ", time.time() - start)

        boxes = outputs[-4][0]
        scores = outputs[-3][0]
        labels = outputs[-2][0]
        masks = outputs[-1][0]

        # correct for image scale
        boxes /= scale

        # visualize detections
        for box, score, label, mask in zip(boxes, scores, labels, masks):
            if score < 0.5:
                break

            color = label_color(label)

            b = box.astype(int)
            draw_box(draw, b, color=color)

            mask = mask[:, :, label]
            draw_mask(draw, b, mask, color=label_color(label))

            caption = "{} {:.3f}".format(labels_to_names[label], score)
            draw_caption(draw, b, caption)

        plt.figure(figsize=(15, 15))
        plt.axis('off')
        plt.imshow(draw)
        plt.show()
        """config = ImageMonkeyConfig(len(labels), num_gpus, min_image_dimension, max_image_dimension, steps_per_epoch, validation_steps) 
Exemplo n.º 2
0
    def retinamask(self):
        backbone = models.backbone('resnet50')
        batch_size = 1
        train_generator, validation_generator = Retinamask.create_generators(
            batch_size, self.annotations, self.classes)
        freeze_backbone = 'store_true'
        weights = self.Input_weights_path
        print('Creating model, this may take a second...')
        model, training_model, prediction_model = Retinamask.create_models(
            backbone_retinanet=backbone.maskrcnn,
            num_classes=train_generator.num_classes(),
            weights=weights,
            freeze_backbone=freeze_backbone)
        #print(model.summary())
        training_model.fit_generator(generator=train_generator,
                                     steps_per_epoch=1000,
                                     epochs=self.epoch,
                                     verbose=1,
                                     max_queue_size=1)
        training_model.save(self.trained_weights_path + 'retinamask.h5')

        #Testing
        model_path = self.trained_weights_path + 'retinamask.h5'
        model = models.load_model(model_path, backbone_name='resnet50')
        labels_to_names = {0: 'ship'}
        # load image
        image = read_image_bgr(test_image_path)
        # copy to draw on
        draw = image.copy()
        draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)
        # preprocess image for network
        image = preprocess_image(image)
        image, scale = resize_image(image)
        # process image
        start = time.time()
        outputs = model.predict_on_batch(np.expand_dims(image, axis=0))
        print("processing time: ", time.time() - start)
        boxes = outputs[-4][0]
        scores = outputs[-3][0]
        labels = outputs[-2][0]
        masks = outputs[-1][0]
        # correct for image scale
        boxes /= scale
        # visualize detections
        for box, score, label, mask in zip(boxes, scores, labels, masks):
            if score < 0.5:
                break
            color = label_color(label)
            b = box.astype(int)
            draw_box(draw, b, color=color)
            mask = mask[:, :, label]
            draw_mask(draw, b, mask, color=label_color(label))
            caption = "{} {:.3f}".format(labels_to_names[label], score)
            draw_caption(draw, b, caption)
            plt.imsave(self.output_image_path + 'output.jpg', draw)
Exemplo n.º 3
0
def main(args=None):
    # parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # make sure keras is the minimum required version
    check_keras_version()

    # create object that stores backbone information
    backbone = models.backbone(args.backbone)

    # optionally choose specific GPU
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    keras.backend.tensorflow_backend.set_session(get_session())

    train_generator, validation_generator = create_generators(args)

    # create the model
    if args.snapshot is not None:
        print('Loading model, this may take a second...')
        model = models.load_model(args.snapshot, backbone_name=args.backbone)
        training_model = model
        prediction_model = model
    else:
        weights = args.weights
        # default to imagenet if nothing else is specified
        if weights is None and args.imagenet_weights:
            weights = backbone.download_imagenet()

        print('Creating model, this may take a second...')
        model, training_model, prediction_model = create_models(
            backbone_retinanet=backbone.maskrcnn,
            num_classes=train_generator.num_classes(),
            weights=weights,
            freeze_backbone=args.freeze_backbone,
            class_specific_filter=args.class_specific_filter)

    print(model.summary())

    callbacks = create_callbacks(model, training_model, prediction_model,
                                 validation_generator, args)

    training_model.fit_generator(generator=train_generator,
                                 steps_per_epoch=args.steps,
                                 epochs=args.epochs,
                                 verbose=1,
                                 callbacks=callbacks,
                                 max_queue_size=1)
Exemplo n.º 4
0
def loadModel(model_type='default', model_path=None):
	from maskrcnn_modanet.processimages import get_session, apply_mask
	import re
	import os

	# import keras
	import keras

	# set tf backend to allow memory to grow, instead of claiming everything
	import tensorflow as tf

	# use this environment flag to change which GPU to use
	#os.environ["CUDA_VISIBLE_DEVICES"] = "1"

	# set the modified tf session as backend in keras
	keras.backend.tensorflow_backend.set_session(get_session())

	# load label to names mapping for visualization purposes
	labels_to_names = {0: 'bag', 1: 'belt', 2: 'boots', 3: 'footwear', 4: 'outer', 5: 'dress', 6: 'sunglasses', 7: 'pants', 8: 'top', 9: 'shorts', 10: 'skirt', 11: 'headwear', 12: 'scarf/tie'}


	# adjust this to point to your trained model
	if model_type == 'trained':
		# get all models names in the results folder
		modelnames = [f for f in os.listdir(snp_path) if os.path.isfile(os.path.join(snp_path, f))]
		
		def extract_number(f):
		    s = re.findall("\d+$",f)
		    return (int(s[0]) if s else -1,f)
		# get the model name with the highest epoch
		print(max(modelnames,key=extract_number))
		model_path = os.path.join(snp_path, max(modelnames,key=extract_number))
	elif model_type == 'default' and not model_path:
		model_path = path + 'results/resnet50_modanet.h5'
	elif model_type == 'coco':
		model_path = path + 'results/resnet50_coco_v0.2.0.h5'
		labels_to_names = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
	elif model_path:
		pass
	else:
		print('The type must be either trained, coco, or default. Alternatively, you can put a custom model path')

	# load retinanet model
	from keras_maskrcnn import models
	print(model_path)
	model = models.load_model(model_path, backbone_name='resnet50')

	return model, labels_to_names
Exemplo n.º 5
0
def loadModel():
    import os

    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

    # set the modified tf session as backend in keras
    keras.backend.tensorflow_backend.set_session(get_session())

    # load label to names mapping for visualization purposes
    labels_to_names = {0: 'bag', 1: 'belt', 2: 'boots', 3: 'footwear', 4: 'outer', 5: 'dress', 6: 'sunglasses',
                       7: 'pants', 8: 'top', 9: 'shorts', 10: 'skirt', 11: 'headwear', 12: 'scarf/tie'}

    model_path = path + 'resnet50_modanet.h5'

    model = models.load_model(model_path, backbone_name='resnet50')

    return model, labels_to_names
def get_maskrcnn_predictions(model_path, backbone, image_files, classes_description, output_csv, show_debug_image):
    model = models.load_model(model_path, backbone_name=backbone)
    classes = get_class_arr(classes_description, type='name')
    classes_google = get_class_arr(classes_description, type='google_name')
    print('Image files to process: {}'.format(len(image_files)))

    out = open(output_csv, 'w')
    out.write('ImageID,ImageWidth,ImageHeight,PredictionString\n')
    for i in range(len(image_files)):
        inp_file = image_files[i]
        id = os.path.basename(inp_file)
        img = read_single_image(inp_file)
        if img is None:
            print('Problem reading image: {}'.format(inp_file))
            continue
        boxes, scores, labels, masks = get_maskrcnn_single_predictions(model, img, classes, show_debug_image)
        s1 = get_preds_as_string(id, img, boxes, scores, labels, masks, classes_google)
        out.write(s1)

    out.close()
Exemplo n.º 7
0
def evaluateModel(model_path):

    import json
    import os

    with open(
            os.path.expanduser('~') + '/.maskrcnn-modanet/' +
            'savedvars.json') as f:
        savedvars = json.load(f)
    path = savedvars['datapath']

    ann_path = path + "datasets/coco/annotations/"
    ann_orig_path = path + 'datasets/modanet/annotations/'

    coco_path = path + "datasets/coco/"

    from keras_maskrcnn import models

    model = models.load_model(model_path, backbone_name='resnet50')

    from keras_retinanet.utils.transform import random_transform_generator

    transform_generator = random_transform_generator(flip_x_chance=0.5)

    from maskrcnn_modanet.train.coco import CocoGenerator

    validation_generator = CocoGenerator(coco_path,
                                         'val',
                                         batch_size=1,
                                         config=None,
                                         image_min_side=800,
                                         image_max_side=1333)

    from keras_maskrcnn.utils.coco_eval import evaluate_coco

    evaluate_coco(validation_generator, model)
Exemplo n.º 8
0
        boxes /= scale
        for box, score, label in zip(boxes, scores, labels):
            if score < thresh:
                continue
            else:
                boxes_final.append(box)
                score_final.append(score)
                label_final.append(label)
        result = {"boxes":[],"scores":[],"labels":[]}
        for box, score, label in zip(boxes_final,score_final,label_final):
            tmp = {}
            Rx = 400/ori_image.shape[1]
            Ry = 600/ori_image.shape[0]
            box_tmp = box.flatten().tolist()
            tmp["width"] = Rx*(box_tmp[2]-box_tmp[0])
            tmp["height"] = Ry*(box_tmp[3] - box_tmp[1])
            tmp["x"] = Rx*box_tmp[0]
            tmp["y"] = Ry*box_tmp[1]
            result["boxes"].append(tmp)
            result["scores"].append(str(score))
            result["labels"].append(labels_to_names[label])
    return json.dumps(result)
if __name__ == "__main__":
    model_path = "../../local/resnet50_modanet.h5"
    labels_to_names = {0: 'bag', 1: 'belt', 2: 'boots', 3: 'footwear', 4: 'outer', 5: 'dress', 6: 'sunglasses', 7: 'pants', 8: 'top', 9: 'shorts', 10: 'skirt', 11: 'headwear', 12: 'scarf/tie'}
    sess = get_session()
    sess.run(tf.global_variables_initializer())
    graph = tf.get_default_graph()
    keras.backend.tensorflow_backend.set_session(sess)
    model = models.load_model(model_path, backbone_name='resnet50')
    app.run(host='0.0.0.0',port=8081,threaded=True)
Exemplo n.º 9
0
def main(proc_img_path=None,
         proc_img_url=None,
         all_set=True,
         save_path=None,
         model_path=None,
         segments=False,
         annotations=False,
         threshold_score=0.5,
         limit=None,
         model=None,
         labels_to_names=None):
    # import keras
    import keras

    # import keras_retinanet
    from keras_maskrcnn import models
    from keras_maskrcnn.utils.visualization import draw_mask
    from keras_retinanet.utils.visualization import draw_box, draw_caption, draw_annotations
    from keras_retinanet.utils.image import read_image_bgr, preprocess_image, resize_image
    from keras_retinanet.utils.colors import label_color

    # import miscellaneous modules
    import matplotlib.pyplot as plt
    import cv2
    import numpy as np
    import time

    # set tf backend to allow memory to grow, instead of claiming everything
    import tensorflow as tf

    # use this environment flag to change which GPU to use
    #os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    with open(
            os.path.expanduser('~') + '/.maskrcnn-modanet/' +
            'savedvars.json') as f:
        savedvars = json.load(f)
    path = savedvars['datapath']

    img_path = path + "datasets/coco/images/"

    if not model:
        # set the modified tf session as backend in keras
        keras.backend.tensorflow_backend.set_session(get_session())

        # adjust this to point to your trained model

        # get all models names in the results folder
        modelnames = [
            f for f in os.listdir(snp_path)
            if os.path.isfile(os.path.join(snp_path, f))
        ]
        import re

        def extract_number(f):
            s = re.findall("\d+$", f)
            return (int(s[0]) if s else -1, f)

        # get the model name with the highest epoch
        print(max(modelnames, key=extract_number))
        model_path = os.path.join(snp_path, max(modelnames,
                                                key=extract_number))

        # load retinanet model

        model = models.load_model(model_path, backbone_name='resnet50')
    if not labels_to_names:
        # load label to names mapping for visualization purposes
        labels_to_names = {
            0: 'bag',
            1: 'belt',
            2: 'boots',
            3: 'footwear',
            4: 'outer',
            5: 'dress',
            6: 'sunglasses',
            7: 'pants',
            8: 'top',
            9: 'shorts',
            10: 'skirt',
            11: 'headwear',
            12: 'scarf/tie'
        }

    default_save_path = False
    if save_path == 'default':
        # set path to default
        save_path = path + 'results/processedimages/'  #images/1.jpg'
        if not annotations:
            save_path += 'images/'
        elif annotations:
            save_path += 'annotations/'
        default_save_path = True
    SAVE_PATH = save_path  # used for multiple images

    if annotations:
        # if save_path: save_path = path + 'results/processedimages/annotations/1.json'
        annotations = [{
            'bbox': None,
            'score': None,
            'category': None,
            'part': None
        }]

    if all_set:
        # load images
        with open(ann_path + 'instances_val.json') as f:
            instances = json.load(f)
        images = instances['images']
        for img in images:
            img['file_name'] = img_path + img['file_name']

    elif proc_img_path:
        # just draw the image selected
        images = [{
            'file_name':
            img_path + proc_img_path if
            os.path.abspath(proc_img_path) != proc_img_path else proc_img_path
        }]
    elif proc_img_url:
        # just draw the image selected
        images = [{'file_name': proc_img_url}]

    try:
        #for each image in the dataset
        for i, img in enumerate(images):
            print(i, end=' ')
            if limit and i >= limit:
                break

            if all_set:
                image = read_image_bgr(img['file_name'])
            elif proc_img_path:
                image = read_image_bgr(img['file_name'])
            elif proc_img_url:
                import requests
                from io import BytesIO
                r = requests.get(img['file_name'], allow_redirects=True)
                image = read_image_bgr(BytesIO(r.content))

            if save_path:
                if proc_img_path or all_set:
                    img_file_name = img['file_name'].split("/")[-1]

                elif proc_img_url:
                    img_file_name = 'urlimg.jpg'
                if not annotations:
                    save_path += img_file_name
                elif annotations:
                    save_path += img_file_name.split('.')[0] + '.json'
            if save_path and segments and not annotations:
                #remove the extension
                save_path = save_path.split('.')[0]

            # copy to draw on
            draw = image.copy()
            draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

            # preprocess image for network
            image = preprocess_image(image)
            image, scale = resize_image(image)

            # process image
            start = time.time()
            outputs = model.predict_on_batch(np.expand_dims(image, axis=0))
            print("processing time: ",
                  time.time() - start, "\t(Ctrl+c and close image to exit)")

            boxes = outputs[-4][0]
            scores = outputs[-3][0]
            labels = outputs[-2][0]
            masks = outputs[-1][0]

            # correct for image scale
            boxes /= scale

            if annotations:
                annotations = [{
                    'bbox': None,
                    'score': None,
                    'category': None,
                    'part': None
                } for i in range(
                    len([
                        score for score in scores if score >= threshold_score
                    ]))]

            segment_id = 0
            # visualize detections
            for box, score, label, mask in zip(boxes, scores, labels, masks):
                if score < threshold_score:
                    break
                color = label_color(label)

                if not segments:
                    b = box.astype(int)
                    draw_box(draw, b, color=color)

                    mask = mask[:, :, label]
                    draw_mask(draw, b, mask, color=label_color(label))

                    caption = "{} {:.3f}".format(labels_to_names[label], score)
                    draw_caption(draw, b, caption)
                elif segments:
                    drawclone = np.copy(draw)

                    b = box.astype(int)
                    # draw_box(drawclone, b, color=color)

                    mask = mask[:, :, label]
                    draw_mask_only(drawclone,
                                   b,
                                   mask,
                                   color=label_color(label))

                    if not annotations:
                        caption = "{} {:.3f}".format(labels_to_names[label],
                                                     score)
                        draw_caption(drawclone, b, caption)
                        plt.figure(figsize=(15, 15))
                        plt.axis('off')
                        plt.imshow(drawclone)
                        if not save_path:
                            plt.show()
                        elif save_path:
                            segment_path = '_segment_' + segment_id + '.jpg'
                            save_path_segment = save_path + segment_path
                            print(save_path_segment)
                            plt.savefig(save_path_segment)
                            plt.close()

                    elif annotations:
                        annotations[segment_id]['bbox'] = b
                        annotations[segment_id]['score'] = score
                        annotations[segment_id]['category'] = label
                        annotations[segment_id][
                            'part'] = drawclone  # only the object inside the mask is shown, the rest is black
                segment_id += 1

            if not segments:

                if not save_path:
                    plt.figure(figsize=(15, 15))
                    plt.axis('off')
                    plt.imshow(draw)
                    if not proc_img_url:
                        print(img['file_name'])
                    plt.show()
                elif save_path:
                    processed_image = Image.fromarray(draw, 'RGB')
                    processed_image.save(save_path)
                    del processed_image
                    print(save_path)
                    # plt.savefig(save_path)
                    # plt.close()
            elif segments:
                if annotations:
                    if save_path:
                        print(save_path)
                        with open(save_path, 'w') as outfile:
                            json.dump(annotations, outfile)
                    else:
                        return annotations
            save_path = SAVE_PATH  # restore path for next image

    except KeyboardInterrupt:
        pass
Exemplo n.º 10
0
def get_model(model_path):
    model = models.load_model(model_path, backbone_name='resnet50')
    return model
Exemplo n.º 11
0
def main(args=None):
    import json
    with open(
            os.path.expanduser('~') + '/.maskrcnn-modanet/' +
            'savedvars.json') as f:
        savedvars = json.load(f)

    # parse arguments
    if args is None:
        print(
            '\n\n\nExample usage: maskrcnn-modanet train --epochs 15 --workers 0 --batch-size 1 coco\n\n\n'
        )
        args = ['-h']
    args = parse_args(args, savedvars)

    # make sure keras is the minimum required version
    check_keras_version()

    # create object that stores backbone information
    backbone = models.backbone(args.backbone)

    # optionally choose specific GPU
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    keras.backend.tensorflow_backend.set_session(get_session())

    # optionally load config parameters
    if args.config:
        args.config = read_config_file(args.config)

    # create the generators
    train_generator, validation_generator = create_generators(args)

    # create the model
    if args.snapshot is not None:
        print('Loading model, this may take a second...')
        model = models.load_model(args.snapshot, backbone_name=args.backbone)
        training_model = model
        prediction_model = model
    else:
        weights = args.weights
        # default to imagenet if nothing else is specified
        if weights is None and args.imagenet_weights:
            weights = backbone.download_imagenet()

        anchor_params = None
        if args.config and 'anchor_parameters' in args.config:
            anchor_params = parse_anchor_parameters(args.config)

        print('Creating model, this may take a second...')
        model, training_model, prediction_model = create_models(
            backbone_retinanet=backbone.maskrcnn,
            num_classes=train_generator.num_classes(),
            weights=weights,
            freeze_backbone=args.freeze_backbone,
            class_specific_filter=args.class_specific_filter,
            anchor_params=anchor_params)

    # print model summary
    print(model.summary())

    # create the callbacks
    callbacks = create_callbacks(
        model,
        training_model,
        prediction_model,
        validation_generator,
        args,
    )

    # Use multiprocessing if workers > 0
    if args.workers > 0:
        use_multiprocessing = True
    else:
        use_multiprocessing = False

    # start training
    training_model.fit_generator(generator=train_generator,
                                 steps_per_epoch=args.steps,
                                 epochs=args.epochs,
                                 verbose=1,
                                 callbacks=callbacks,
                                 workers=args.workers,
                                 use_multiprocessing=use_multiprocessing,
                                 max_queue_size=args.max_queue_size)
Exemplo n.º 12
0
def segmentation(imgpath, score_threshold=0.5, binarize_threshold=0.5):
    # adjust this to point to your downloaded/trained model
    model_path = "resnet50_weights/resnet50_coco_v0.2.0.h5"
    # load retinanet model
    model = models.load_model(model_path, backbone_name='resnet50')
    # load label to names mapping for visualization purposes
    labels_to_names = {
        0: 'person',
        1: 'bicycle',
        2: 'car',
        3: 'motorcycle',
        4: 'airplane',
        5: 'bus',
        6: 'train',
        7: 'truck',
        8: 'boat',
        9: 'traffic light',
        10: 'fire hydrant',
        11: 'stop sign',
        12: 'parking meter',
        13: 'bench',
        14: 'bird',
        15: 'cat',
        16: 'dog',
        17: 'horse',
        18: 'sheep',
        19: 'cow',
        20: 'elephant',
        21: 'bear',
        22: 'zebra',
        23: 'giraffe',
        24: 'backpack',
        25: 'umbrella',
        26: 'handbag',
        27: 'tie',
        28: 'suitcase',
        29: 'frisbee',
        30: 'skis',
        31: 'snowboard',
        32: 'sports ball',
        33: 'kite',
        34: 'baseball bat',
        35: 'baseball glove',
        36: 'skateboard',
        37: 'surfboard',
        38: 'tennis racket',
        39: 'bottle',
        40: 'wine glass',
        41: 'cup',
        42: 'fork',
        43: 'knife',
        44: 'spoon',
        45: 'bowl',
        46: 'banana',
        47: 'apple',
        48: 'sandwich',
        49: 'orange',
        50: 'broccoli',
        51: 'carrot',
        52: 'hot dog',
        53: 'pizza',
        54: 'donut',
        55: 'cake',
        56: 'chair',
        57: 'couch',
        58: 'potted plant',
        59: 'bed',
        60: 'dining table',
        61: 'toilet',
        62: 'tv',
        63: 'laptop',
        64: 'mouse',
        65: 'remote',
        66: 'keyboard',
        67: 'cell phone',
        68: 'microwave',
        69: 'oven',
        70: 'toaster',
        71: 'sink',
        72: 'refrigerator',
        73: 'book',
        74: 'clock',
        75: 'vase',
        76: 'scissors',
        77: 'teddy bear',
        78: 'hair drier',
        79: 'toothbrush'
    }
    # load image
    image = read_image_bgr(imgpath)
    # copy to draw on
    draw = image.copy()
    draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)
    # preprocess image for network
    image = preprocess_image(image)
    image, scale = resize_image(image)
    # process image
    start = time.time()
    outputs = model.predict_on_batch(np.expand_dims(image, axis=0))
    print("Segmentation took", round(time.time() - start, 2), "seconds.")
    boxes = outputs[-4][0]
    scores = outputs[-3][0]
    labels = outputs[-2][0]
    masks = outputs[-1][0]

    # correct for image scale
    boxes /= scale
    mask_list = []
    box_list = []
    for box, score, label, mask in zip(boxes, scores, labels, masks):
        if score < 0.5:
            break
        # save box coordinates in list
        box = box.astype(np.int16)
        box_list.append(box[:])
        # resize to fit the box
        mask = mask.astype(np.float32)
        mask = cv2.resize(mask, (box[2] - box[0], box[3] - box[1]))

        # binarize the mask
        mask = (mask > binarize_threshold).astype(np.uint8)
        mask = cv2.normalize(src=mask,
                             dst=None,
                             alpha=0,
                             beta=255,
                             norm_type=cv2.NORM_MINMAX,
                             dtype=cv2.CV_8U)
        mask_list.append(mask[:, :, label])

    # visualize detections
    for box, score, label, mask in zip(boxes, scores, labels, masks):
        if score < 0.5:
            break

        color = label_color(label)

        b = box.astype(int)
        draw_box(draw, b, color=color)

        mask = mask[:, :, label]
        draw_mask(draw, b, mask, color=label_color(label))

        caption = "{} {:.3f}".format(labels_to_names[label], score)
        draw_caption(draw, b, caption)
    return mask_list, box_list, draw
def main(args=None):
    from keras import backend as K

    # parse arguments
    if args is None:
        args = sys.argv[1:]
    args = parse_args(args)

    # make sure keras is the minimum required version
    check_keras_version()

    # create object that stores backbone information
    backbone = models.backbone(args.backbone)

    # optionally choose specific GPU
    if args.gpu:
        print('Use GPU: {}'.format(args.gpu))
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    keras.backend.tensorflow_backend.set_session(get_session())

    # create the generators
    train_generator, validation_generator = create_generators(args)

    # create the model
    if args.snapshot is not None:
        print('Loading model {}, this may take a second...'.format(
            args.snapshot))
        model = models.load_model(args.snapshot, backbone_name=args.backbone)
        training_model = model
        prediction_model = model
    else:
        weights = args.weights
        # default to imagenet if nothing else is specified
        if weights is None and args.imagenet_weights:
            weights = backbone.download_imagenet()

        anchor_params = None

        print('Creating model, this may take a second...')
        model, training_model, prediction_model = create_models(
            backbone_retinanet=backbone.maskrcnn,
            num_classes=train_generator.num_classes(),
            weights=weights,
            args=args,
            freeze_backbone=args.freeze_backbone,
            class_specific_filter=args.class_specific_filter,
            anchor_params=anchor_params)

    # print model summary
    print(model.summary())

    print('Learning rate: {}'.format(K.get_value(model.optimizer.lr)))
    if args.lr > 0.0:
        K.set_value(model.optimizer.lr, args.lr)
        print('Updated learning rate: {}'.format(
            K.get_value(model.optimizer.lr)))

    # create the callbacks
    callbacks = create_callbacks(
        model,
        training_model,
        prediction_model,
        validation_generator,
        args,
    )

    initial_epoch = 0
    if args.snapshot is not None:
        initial_epoch = int((args.snapshot.split('_')[-1]).split('.')[0])

    # start training
    training_model.fit_generator(
        generator=train_generator,
        steps_per_epoch=args.steps,
        epochs=args.epochs,
        verbose=1,
        callbacks=callbacks,
        max_queue_size=1,
        initial_epoch=initial_epoch,
    )