示例#1
0
    def __init__(self, fileName):
        
        # self.rgb = np.array(Image.open(fileName).convert("RGB"))
        self.rgb = utils.read_image(fileName, color=True)
        print(self.rgb.shape)

        bboxes, labels, scores, masks = model.predict([self.rgb])        
        bbox, label, score, mask = bboxes[0], np.asarray(labels[0],dtype=np.int32), scores[0], masks[0]
        print(bbox)

        vis_bbox(self.rgb, bbox, label=label, score=score, mask=mask, label_names=('onigiri'), contour=False, labeldisplay=True)
        plt.show()
    def get_predictions(self, img_name='', image=None, plot=False):
        """
        Gets the bounding box prediction for the image and returns them
        in tensor of bboxes in the format:
           [[x1, y1, x2, y2, score], ...]
        """
        if img_name:
            img = cv2.imread(os.path.join(settings.INPUT_FOLDER, img_name))
        else:
            img = image

        img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1))
        img, info_img = preprocess(img, self.imgsize,
                                   jitter=0)  # info = (h, w, nh, nw, dx, dy)
        img = np.transpose(img / 255., (2, 0, 1))
        img = torch.from_numpy(img).float().unsqueeze(0)

        if use_cuda():
            img = Variable(img.type(torch.cuda.FloatTensor))
        else:
            img = Variable(img.type(torch.FloatTensor))

        with torch.no_grad():
            outputs = self.model(img)
            outputs = postprocess(outputs,
                                  Dataset.NUM_CLASSES[Dataset.SIGNET_RING],
                                  self.confthre, self.nmsthre)

        bboxes = list()
        colors = list()
        bboxes_with_scores = list()

        if outputs[0] is not None:
            for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]:
                print(int(x1), int(y1), int(x2), int(y2), float(conf),
                      int(cls_pred))
                print('\t+ Conf: %.5f' % cls_conf.item())
                box = yolobox2label([y1, x1, y2, x2], info_img)
                bboxes.append(box)
                colors.append(BOX_COLOR)
                tmp = [box[1], box[0], box[3], box[2]]
                tmp.append(conf * cls_conf)
                bboxes_with_scores.append(tmp)

        if plot:
            vis_bbox(img_raw, bboxes, instance_colors=colors, linewidth=2)
            plt.show()

        return torch.FloatTensor(bboxes_with_scores)
示例#3
0
def draw(model,datatype,imgsize):
    model.eval()
    coco_class_names, coco_class_ids, coco_class_colors = get_coco_label_names()
    id_list_file = os.path.join(
            '../../VOCdevkit/VOC2007', 'ImageSets/Main/{0}.txt'.format('test'))
    ids = [id_.strip() for id_ in open(id_list_file)]
    for i in tqdm(range(len(ids))):
        if datatype=='voc':
            img_file = os.path.join('../../VOCdevkit/VOC2007', 'JPEGImages', ids[i] + '.jpg')
        else:
            img_file = os.path.join('COCO', 'train2017',
                                    '{:012}'.format(id_) + '.jpg')
        img = cv2.imread(img_file)
        img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1))

        img, info_img_t = preprocess(img, imgsize, jitter=0)  # info = (h, w, nh, nw, dx, dy)
        img = np.transpose(img / 255., (2, 0, 1))
        img = torch.from_numpy(img).float().unsqueeze(0)
        img = Variable(img.type(torch.cuda.FloatTensor))
        outputs = model(img)

        outputs = postprocess(outputs, 80, 0.5, 0.5)
        # imgs.shape : torch.Size([1, 3, 608, 608])
        # outputs[0].shape :torch.Size([3, 7])
        # targets.shape :torch.Size([1, 50, 5])
        # print(outputs)
        if outputs[0] is not None:
            bboxes = list()
            classes = list()
            colors = list()
            for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]:

                cls_id = coco_class_ids[int(cls_pred)]
                box = yolobox2label([y1, x1, y2, x2], info_img_t)
                bboxes.append(box)
                classes.append(cls_id)
                colors.append(coco_class_colors[int(cls_pred)])
            vis_bbox(
                img_raw, bboxes, label=classes, label_names=coco_class_names,
                instance_colors=colors, linewidth=2)
            plt.savefig('draw/'+ids[i]+'.jpg')
    model.train()
示例#4
0
def main():
    """
    Visualize the detection result for the given image and the pre-trained model.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--cfg', type=str, default='config/yolov3_default.cfg')
    parser.add_argument('--ckpt', type=str, help='path to the checkpoint file')
    parser.add_argument('--weights_path',
                        type=str,
                        default=None,
                        help='path to weights file')
    parser.add_argument('--image', type=str)
    parser.add_argument(
        '--background',
        action='store_true',
        default=False,
        help='background(no-display mode. save "./output.png")')
    parser.add_argument('--detect_thresh',
                        type=float,
                        default=None,
                        help='confidence threshold')
    args = parser.parse_args()

    with open(args.cfg, 'r') as f:
        cfg = yaml.load(f)

    imgsize = cfg['TEST']['IMGSIZE']
    model = YOLOv3(cfg['MODEL'])

    confthre = cfg['TEST']['CONFTHRE']
    nmsthre = cfg['TEST']['NMSTHRE']

    if args.detect_thresh:
        confthre = args.detect_thresh

    img = cv2.imread(args.image)
    img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1))
    img, info_img = preprocess(img, imgsize,
                               jitter=0)  # info = (h, w, nh, nw, dx, dy)
    img = np.transpose(img / 255., (2, 0, 1))
    img = torch.from_numpy(img).float().unsqueeze(0)

    if args.gpu >= 0:
        model.cuda(args.gpu)
        img = Variable(img.type(torch.cuda.FloatTensor))
    else:
        img = Variable(img.type(torch.FloatTensor))

    if args.weights_path:
        print("loading yolo weights %s" % (args.weights_path))
        parse_yolo_weights(model, args.weights_path)
    else:
        print("loading checkpoint %s" % (args.ckpt))
        model.load_state_dict(torch.load(args.ckpt))

    model.eval()

    with torch.no_grad():
        outputs = model(img)
        outputs = postprocess(outputs, 80, confthre, nmsthre)

    if outputs[0] is None:
        print("No Objects Deteted!!")
        return

    coco_class_names, coco_class_ids, coco_class_colors = get_coco_label_names(
    )

    bboxes = list()
    classes = list()
    colors = list()

    for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]:

        cls_id = coco_class_ids[int(cls_pred)]
        print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred))
        print('\t+ Label: %s, Conf: %.5f' %
              (coco_class_names[cls_id], cls_conf.item()))
        box = yolobox2label([y1, x1, y2, x2], info_img)
        bboxes.append(box)
        classes.append(cls_id)
        colors.append(coco_class_colors[int(cls_pred)])

    if args.background:
        import matplotlib
        matplotlib.use('Agg')

    from utils.vis_bbox import vis_bbox
    import matplotlib.pyplot as plt

    vis_bbox(img_raw,
             bboxes,
             label=classes,
             label_names=coco_class_names,
             instance_colors=colors,
             linewidth=2)
    #plt.show()

    if args.background:
        plt.savefig('output.png')
示例#5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--modelfile')
    parser.add_argument('--image', type=str)
    parser.add_argument('--roi_size',
                        '-r',
                        type=int,
                        default=7,
                        help='ROI size for mask head input')
    parser.add_argument('--roialign',
                        action='store_false',
                        default=True,
                        help='default: True')
    parser.add_argument('--contour',
                        action='store_true',
                        default=False,
                        help='visualize contour')
    parser.add_argument('--background',
                        action='store_true',
                        default=False,
                        help='background(no-display mode)')
    parser.add_argument('--extractor',
                        choices=('resnet50', 'resnet101'),
                        default='resnet50',
                        help='extractor network')
    args = parser.parse_args()
    if args.background:
        import matplotlib
        matplotlib.use('Agg')
    import matplotlib.pyplot as plot
    from utils.vis_bbox import vis_bbox
    from chainercv.datasets import voc_bbox_label_names
    from mask_rcnn_resnet import MaskRCNNResNet
    from chainercv import utils
    if args.extractor == 'resnet50':
        model = MaskRCNNResNet(n_fg_class=80,
                               roi_size=args.roi_size,
                               n_layers=50,
                               roi_align=args.roialign)
    elif args.extractor == 'resnet101':
        model = MaskRCNNResNet(n_fg_class=80,
                               roi_size=args.roi_size,
                               n_layers=101,
                               roi_align=args.roialign)
    chainer.serializers.load_npz(args.modelfile, model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    img = utils.read_image(args.image, color=True)
    bboxes, rois, labels, scores, masks = model.predict([img])
    print(bboxes, rois)
    bbox, roi, label, score, mask = bboxes[0], rois[0], np.asarray(
        labels[0], dtype=np.int32), scores[0], masks[0]
    #print(bbox, np.asarray(label,dtype=np.int32), score, mask)

    coco_label_names = (
        'background',  # class zero
        'person',
        'bicycle',
        'car',
        'motorcycle',
        'airplane',
        'bus',
        'train',
        'truck',
        'boat',
        'traffic light',
        'fire hydrant',
        'street sign',
        'stop sign',
        'parking meter',
        'bench',
        'bird',
        'cat',
        'dog',
        'horse',
        'sheep',
        'cow',
        'elephant',
        'bear',
        'zebra',
        'giraffe',
        'hat',
        'backpack',
        'umbrella',
        'shoe',
        'eye glasses',
        'handbag',
        'tie',
        'suitcase',
        'frisbee',
        'skis',
        'snowboard',
        'sports ball',
        'kite',
        'baseball bat',
        'baseball glove',
        'skateboard',
        'surfboard',
        'tennis racket',
        'bottle',
        'plate',
        'wine glass',
        'cup',
        'fork',
        'knife',
        'spoon',
        'bowl',
        'banana',
        'apple',
        'sandwich',
        'orange',
        'broccoli',
        'carrot',
        'hot dog',
        'pizza',
        'donut',
        'cake',
        'chair',
        'couch',
        'potted plant',
        'bed',
        'mirror',
        'dining table',
        'window',
        'desk',
        'toilet',
        'door',
        'tv',
        'laptop',
        'mouse',
        'remote',
        'keyboard',
        'cell phone',
        'microwave',
        'oven',
        'toaster',
        'sink',
        'refrigerator',
        'blender',
        'book',
        'clock',
        'vase',
        'scissors',
        'teddy bear',
        'hair drier',
        'toothbrush')
    vis_bbox(img,
             roi,
             roi,
             label=label,
             score=score,
             mask=mask,
             label_names=coco_label_names,
             contour=args.contour,
             labeldisplay=True)
    #plot.show()
    filename = "output.png"
    plot.savefig(filename)
示例#6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--modelfile')
    parser.add_argument('--image', type=str)
    parser.add_argument('--roi_size',
                        '-r',
                        type=int,
                        default=14,
                        help='ROI size for mask head input')
    parser.add_argument('--roialign',
                        action='store_false',
                        default=True,
                        help='default: True')
    parser.add_argument('--contour',
                        action='store_true',
                        default=False,
                        help='visualize contour')
    parser.add_argument('--background',
                        action='store_true',
                        default=False,
                        help='background(no-display mode)')
    parser.add_argument('--bn2affine',
                        action='store_true',
                        default=False,
                        help='batchnorm to affine')
    parser.add_argument('--extractor',
                        choices=('resnet50', 'resnet101'),
                        default='resnet50',
                        help='extractor network')
    args = parser.parse_args()

    #network class id --> coco label id
    test_class_ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, \
    27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, \
    57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]

    if args.background:
        import matplotlib
        matplotlib.use('Agg')
    import matplotlib.pyplot as plot
    from utils.vis_bbox import vis_bbox
    from chainercv.datasets import voc_bbox_label_names
    from mask_rcnn_resnet import MaskRCNNResNet
    from chainercv import utils
    if args.extractor == 'resnet50':
        model = MaskRCNNResNet(n_fg_class=80,
                               roi_size=args.roi_size,
                               pretrained_model=args.modelfile,
                               n_layers=50,
                               roi_align=args.roialign,
                               class_ids=test_class_ids)
    elif args.extractor == 'resnet101':
        model = MaskRCNNResNet(n_fg_class=80,
                               roi_size=args.roi_size,
                               pretrained_model=args.modelfile,
                               n_layers=101,
                               roi_align=args.roialign,
                               class_ids=test_class_ids)

    chainer.serializers.load_npz(args.modelfile, model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    if args.bn2affine:
        bn_to_affine(model)
    img = utils.read_image(args.image, color=True)
    bboxes, labels, scores, masks = model.predict([img])
    bbox, label, score, mask = bboxes[0], np.asarray(
        labels[0], dtype=np.int32), scores[0], masks[0]
    #print(bbox, np.asarray(label,dtype=np.int32), score, mask)

    coco_label_names = (
        'background',  # class zero
        'person',
        'bicycle',
        'car',
        'motorcycle',
        'airplane',
        'bus',
        'train',
        'truck',
        'boat',
        'traffic light',
        'fire hydrant',
        'street sign',
        'stop sign',
        'parking meter',
        'bench',
        'bird',
        'cat',
        'dog',
        'horse',
        'sheep',
        'cow',
        'elephant',
        'bear',
        'zebra',
        'giraffe',
        'hat',
        'backpack',
        'umbrella',
        'shoe',
        'eye glasses',
        'handbag',
        'tie',
        'suitcase',
        'frisbee',
        'skis',
        'snowboard',
        'sports ball',
        'kite',
        'baseball bat',
        'baseball glove',
        'skateboard',
        'surfboard',
        'tennis racket',
        'bottle',
        'plate',
        'wine glass',
        'cup',
        'fork',
        'knife',
        'spoon',
        'bowl',
        'banana',
        'apple',
        'sandwich',
        'orange',
        'broccoli',
        'carrot',
        'hot dog',
        'pizza',
        'donut',
        'cake',
        'chair',
        'couch',
        'potted plant',
        'bed',
        'mirror',
        'dining table',
        'window',
        'desk',
        'toilet',
        'door',
        'tv',
        'laptop',
        'mouse',
        'remote',
        'keyboard',
        'cell phone',
        'microwave',
        'oven',
        'toaster',
        'sink',
        'refrigerator',
        'blender',
        'book',
        'clock',
        'vase',
        'scissors',
        'teddy bear',
        'hair drier',
        'toothbrush')
    vis_bbox(img,
             bbox,
             label=label,
             score=score,
             mask=mask,
             label_names=coco_label_names,
             contour=args.contour,
             labeldisplay=True)
    plot.show()
    filename = "output.png"
    plot.savefig(filename)
示例#7
0
def main(
        image = None ,
        gpu = -1,
        weights_path=  f"{ Path(__file__).parent }/weights/yolov3.weights",
        background = False
):
    """
    Visualize the detection result for the given image and the pre-trained model.
    """
    print( weights_path )
    my_path = Path( __file__ ).parent

    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default= gpu )
    parser.add_argument('--cfg', type=str, default=my_path/'config/yolov3_default.cfg')
    parser.add_argument('--ckpt', type=str,
                        help='path to the checkpoint file')
    parser.add_argument('--weights_path', type=str,
                        default= weights_path, help='path to weights file')
    parser.add_argument('--image', type=str , default= image )
    parser.add_argument('--background', type=bool,
                        default= background , help='background(no-display mode. save "./output.png")')
    parser.add_argument('--detect_thresh', type=float,
                        default= 0.5 , help='confidence threshold')
    args = parser.parse_args()

    with open(args.cfg, 'r') as f:
        cfg = yaml.load(f)

    imgsize = cfg['TEST']['IMGSIZE']
    model = YOLOv3(cfg['MODEL'])

    confthre = cfg['TEST']['CONFTHRE'] 
    nmsthre = cfg['TEST']['NMSTHRE']

    if args.detect_thresh:
        confthre = args.detect_thresh



    img = imread( args.image )
    if img is None :
        print( "load image failed" )
        print( args.image )
        return

    img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1))
    img, info_img = preprocess(img, imgsize, jitter=0)  # info = (h, w, nh, nw, dx, dy)
    img = np.transpose(img / 255., (2, 0, 1))
    img = torch.from_numpy(img).float().unsqueeze(0)

    if args.gpu >= 0:
        model.cuda(args.gpu)
        img = Variable(img.type(torch.cuda.FloatTensor))
    else:
        img = Variable(img.type(torch.FloatTensor))

    assert args.weights_path or args.ckpt, 'One of --weights_path and --ckpt must be specified'

    if args.weights_path:
        print("loading yolo weights %s" % (args.weights_path))
        parse_yolo_weights(model, args.weights_path)
    elif args.ckpt:
        print("loading checkpoint %s" % (args.ckpt))
        state = torch.load(args.ckpt)
        if 'model_state_dict' in state.keys():
            model.load_state_dict(state['model_state_dict'])
        else:
            model.load_state_dict(state)

    model.eval()


    with torch.no_grad():
        outputs1 = model(img)
        # np.save("output.npy" , outputs.numpy() )
        # torch.save( outputs1 , "outputs1.pt" )
        out1 = torch.load( "outputs1.pt" )
        rere = torch.equal( outputs1 , out1 )
        outputs = postprocess(outputs1, 80, confthre, nmsthre)

        a = "hoho"


    if outputs[0] is None:
        print("No Objects Deteted!!")
        return

    coco_class_names, coco_class_ids, coco_class_colors = get_coco_label_names()

    bboxes = list()
    classes = list()
    colors = list()

    for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]:

        cls_id = coco_class_ids[int(cls_pred)]
        print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred))
        print('\t+ Label: %s, Conf: %.5f' %
              (coco_class_names[cls_id], cls_conf.item()))
        box = yolobox2label([y1, x1, y2, x2], info_img)
        bboxes.append(box)
        classes.append(cls_id)
        colors.append(coco_class_colors[int(cls_pred)])

    # args.background = True

    if args.background:
        import matplotlib
        matplotlib.use('Agg')

    from utils.vis_bbox import vis_bbox

    vis_bbox(
        img_raw, bboxes, label=classes, label_names=coco_class_names,
        instance_colors=colors, linewidth=2)


    if args.background:
        output = Path( "./output" )
        output.mkdir( parents=True , exist_ok=True )
        now = datetime.now().strftime("%Y-%m-%d %H-%M-%S")
        output /= f"output-{now}.png"
        plt.savefig( output )

        return str( output.absolute() )
        # return  plt_to_qpixmap(plt.gca())
    else :
        plt.show()
def main():
    args = parse_args()

    print("------------------------------------")
    print("    use {} dataset for demo.        ".format(args.data))
    print("------------------------------------")

    assert args.data in ['coco', 'drone']

    if torch.cuda.is_available() and args.gpu >= 0:
        device = torch.device('cuda:{}'.format(args.gpu))
    else:
        device = torch.device('cpu')

    # [TBM] gen n_classes from coco-format json file..
    if args.data == 'coco':
        cfg_path = 'config/yolov3_default.cfg'
        n_classes = 80
    if args.data == 'drone':
        cfg_path = 'config/yolov3_visdrone_default.cfg'
        n_classes = 10

    with open(cfg_path, 'r') as f:
        cfg = yaml.load(f)

    imgsize = cfg['TEST']['IMGSIZE']
    model = YOLOv3(n_classes=n_classes)
    confthre = cfg['TEST']['CONFTHRE']
    nmsthre = cfg['TEST']['NMSTHRE']

    if args.detect_thresh:
        confthre = args.detect_thresh

    img = cv2.imread(args.image)
    assert img is not None

    img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1))
    img, info_img = preprocess(img, imgsize)  # info = (h, w, nh, nw, dx, dy)
    img = torch.from_numpy(img).float().unsqueeze(0)

    model = model.to(device)
    img = Variable(img.to(device, dtype=torch.float32))

    if args.weights_path:
        print("loading yolo weights %s" % (args.weights_path))
        parse_yolo_weights(model, args.weights_path)
    else:
        print("loading checkpoint %s" % (args.ckpt))
        model.load_state_dict(torch.load(args.ckpt))

    model.eval()

    with torch.no_grad():
        outputs = model(img)
        outputs = postprocess(outputs, n_classes, confthre, nmsthre)

    # [TBM] gen label_names from coco-format json file..
    if args.data == 'coco':
        class_names, class_ids, class_colors = get_coco_label_names()
    if args.data == 'drone':
        class_names, class_ids, class_colors = get_visdrone_label_names()

    bboxes = list()
    classes = list()
    colors = list()

    for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]:

        cls_id = class_ids[int(cls_pred)]
        print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred))
        print('\t+ Label: %s, Conf: %.5f' %
              (class_names[cls_id], cls_conf.item()))
        box = yolobox2label([y1, x1, y2, x2], info_img)
        bboxes.append(box)
        classes.append(cls_id)
        colors.append(class_colors[int(cls_pred)])

    vis_bbox(img_raw,
             bboxes,
             label=classes,
             label_names=class_names,
             instance_colors=colors,
             linewidth=2)

    if args.window:
        plt.show()
    else:
        out_path = './output.png'
        plt.savefig(out_path, bbox_inches=0, pad_inches=0, dpi=100)
示例#9
0
            elif cls_id == 65:
                # 신호등
                z_world = 1065 / width * 1060
            else:
                # 나머지
                z_world = 1065 / width * 1060
            x_world = (x_cent - 960) / 1060 * z_world
            y_world = (y_cent - 600) / 1060 * z_world
            distance = round(
                ((x_world**2) + (y_world**2) + (z_world**2))**0.5 / 1000, 2)
            world.append((x_world, y_world, z_world))
            dist.append(distance)

        fig, ax = vis_bbox(
            img_raw,
            bboxes,
            label=classes,
            score=scores,
            label_names=coco_class_names,
            sigma=sigmas,
            sigma_scale_img=sigma_scale_img,
            world=world,
            distance=dist,
            sigma_scale_xy=2.,
            sigma_scale_wh=2.,  # 2-sigma
            show_inner_bound=False,  # do not show inner rectangle for simplicity
            instance_colors=colors,
            linewidth=3)

        fig.savefig('./demo/result/' + split_list[j][i])
        print('{} save'.format(split_list[j][i]))
示例#10
0
def main():
    """
    Visualize the detection result for the given image and the pre-trained model.
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument('--cfg',
                        type=str,
                        default='config/yolov3_default_digestpath.cfg')
    parser.add_argument('--ckpt', type=str, help='path to the checkpoint file')
    parser.add_argument('--weights_path',
                        type=str,
                        default=None,
                        help='path to weights file')
    parser.add_argument('--image', type=str)
    parser.add_argument(
        '--background',
        action='store_true',
        default=False,
        help='background(no-display mode. save "./output.png")')
    parser.add_argument('--detect_thresh',
                        type=float,
                        default=None,
                        help='confidence threshold')
    parser.add_argument('--dataset',
                        help='dataset to work with: {}'.format(
                            Dataset.print_choices()),
                        type=int,
                        default=Dataset.SIGNET_RING)

    args = parser.parse_args()

    with open(args.cfg, 'r') as f:
        cfg = yaml.load(f)

    imgsize = cfg['TEST']['IMGSIZE']
    model = YOLOv3(cfg['MODEL'])

    confthre = cfg['TEST']['CONFTHRE']
    nmsthre = cfg['TEST']['NMSTHRE']

    if args.detect_thresh:
        confthre = args.detect_thresh

    img = cv2.imread(args.image)
    img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1))
    img, info_img = preprocess(img, imgsize,
                               jitter=0)  # info = (h, w, nh, nw, dx, dy)
    img = np.transpose(img / 255., (2, 0, 1))
    img = torch.from_numpy(img).float().unsqueeze(0)

    if args.gpu >= 0:
        model.cuda(args.gpu)
        img = Variable(img.type(torch.cuda.FloatTensor))
    else:
        img = Variable(img.type(torch.FloatTensor))

    assert args.weights_path or args.ckpt, 'One of --weights_path and --ckpt must be specified'

    if args.weights_path:
        print("loading yolo weights %s" % (args.weights_path))
        parse_yolo_weights(model, args.weights_path)
    elif args.ckpt:
        print("loading checkpoint %s" % (args.ckpt))
        state = torch.load(args.ckpt)
        if 'model_state_dict' in state.keys():
            model.load_state_dict(state['model_state_dict'])
        else:
            model.load_state_dict(state)

    model.eval()

    with torch.no_grad():
        outputs = model(img)
        outputs = postprocess(outputs, Dataset.NUM_CLASSES[args.dataset],
                              confthre, nmsthre)

    if outputs[0] is None:
        print("No Objects Deteted!!")
        return

    bboxes = list()
    colors = list()

    for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]:
        print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred))
        print('\t+ Conf: %.5f' % cls_conf.item())
        box = yolobox2label([y1, x1, y2, x2], info_img)
        bboxes.append(box)
        colors.append(BOX_COLOR)

    if args.background:
        matplotlib.use('Agg')

    vis_bbox(img_raw, bboxes, instance_colors=colors, linewidth=2)
    plt.show()

    if args.background:
        plt.savefig('output.png')
示例#11
0
def main():
    """
    YOLOv3 trainer. See README for details.
    """
    args = parse_args()
    print("Setting Arguments.. : ", args)

    cuda = torch.cuda.is_available() and args.use_cuda
    os.makedirs(args.checkpoint_dir, exist_ok=True)

    # Parse config settings
    with open(args.cfg, 'r') as f:
        cfg = yaml.load(f)

    print("successfully loaded config file: ", cfg)

    momentum = cfg['TRAIN']['MOMENTUM']
    decay = cfg['TRAIN']['DECAY']
    burn_in = cfg['TRAIN']['BURN_IN']
    iter_size = cfg['TRAIN']['MAXITER']
    steps = eval(cfg['TRAIN']['STEPS'])
    batch_size = cfg['TRAIN']['BATCHSIZE']
    subdivision = cfg['TRAIN']['SUBDIVISION']
    ignore_thre = cfg['TRAIN']['IGNORETHRE']
    random_resize = cfg['AUGMENTATION']['RANDRESIZE']
    base_lr = cfg['TRAIN']['LR'] / batch_size / subdivision
    datatype = cfg['TRAIN']['DATATYPE']
    print('effective_batch_size = batch_size * iter_size = %d * %d' %
          (batch_size, subdivision))

    # Learning rate setup
    def burnin_schedule(i):
        if i < burn_in:
            factor = pow(i / burn_in, 4)
        elif i < steps[0]:
            factor = 1.0
        elif i < steps[1]:
            factor = 0.1
        else:
            factor = 0.01
        return factor

    # Initiate model
    model = YOLOv3(cfg['MODEL'], ignore_thre=ignore_thre)

    if args.weights_path:
        print("loading darknet weights....", args.weights_path)
        parse_yolo_weights(model, args.weights_path)
    elif args.checkpoint:
        print("loading pytorch ckpt...", args.checkpoint)
        state = torch.load(args.checkpoint)
        if 'model_state_dict' in state.keys():
            model.load_state_dict(state['model_state_dict'])
        else:
            model.load_state_dict(state)

    if cuda:
        print("using cuda")
        model = model.cuda()

    if args.tfboard:
        print("using tfboard")
        from tensorboardX import SummaryWriter
        tblogger = SummaryWriter(args.tfboard)

    model.train()
    coco_class_names, coco_class_ids, coco_class_colors = get_coco_label_names()
    imgsize = cfg['TRAIN']['IMGSIZE']
    if datatype=='voc':
        dataset = VOCDataset(model_type=cfg['MODEL']['TYPE'],
                              data_dir='../../VOCdevkit/VOC2007',
                              img_size=imgsize,
                              augmentation=cfg['AUGMENTATION'],
                              debug=args.debug)
        print('load voc dataset successfully')
    else:
        dataset = COCODataset(model_type=cfg['MODEL']['TYPE'],
                      data_dir='COCO/',
                      img_size=imgsize,
                      augmentation=cfg['AUGMENTATION'],
                      debug=args.debug)
        print('load COCO dataset successfully')

        evaluator = COCOAPIEvaluator(model_type=cfg['MODEL']['TYPE'],
                                     data_dir='COCO/',
                                     img_size=cfg['TEST']['IMGSIZE'],
                                     confthre=cfg['TEST']['CONFTHRE'],
                                     nmsthre=cfg['TEST']['NMSTHRE'])

    dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=True, num_workers=args.n_cpu)
    dataiterator = iter(dataloader)


    dtype = torch.cuda.FloatTensor if cuda else torch.FloatTensor

    # optimizer setup
    # set weight decay only on conv.weight
    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        if 'conv.weight' in key:
            params += [{'params': value, 'weight_decay': decay
                        * batch_size * subdivision}]
        else:
            params += [{'params': value, 'weight_decay': 0.0}]
    optimizer = optim.SGD(params, lr=base_lr, momentum=momentum,
                          dampening=0, weight_decay=decay * batch_size * subdivision)

    iter_state = 0

    if args.checkpoint:
        if 'optimizer_state_dict' in state.keys():
            optimizer.load_state_dict(state['optimizer_state_dict'])
            iter_state = state['iter'] + 1
    #学习率控制 Sets the learning rate of each parameter group to the initial lr times a given function. When last_epoch=-1, sets initial lr as lr.
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, burnin_schedule)

    # result=evals(model)
    # print(result)

    # start training loop
    # print('args.eval_interval',args.eval_interval)
    for iter_i in range(iter_state, iter_size + 1):
        if iter_i % (args.eval_interval*2) == 0 and iter_i > 0:
            if datatype=='voc':
                result=evals(model)
                print(result)
            else:
                ap50_95, ap50 = evaluator.evaluate(model)
                print(ap50_95, ap50)
                model.train()
                if args.tfboard:
                    tblogger.add_scalar('val/COCOAP50', ap50, iter_i)
                    tblogger.add_scalar('val/COCOAP50_95', ap50_95, iter_i)

        if iter_i % (40000) == 0 and iter_i > 0:
            draw(model,datatype,imgsize)
        # subdivision loop
        optimizer.zero_grad()
        for inner_iter_i in range(subdivision):
            try:
                imgs, targets, info_img, id_ = next(dataiterator)  # load a batch
            except StopIteration:
                dataiterator = iter(dataloader)
                imgs, targets, info_img, id_ = next(dataiterator)  # load a batch
            imgs = Variable(imgs.type(dtype))
            targets = Variable(targets.type(dtype), requires_grad=False)
            loss = model(imgs, targets)
            loss.backward()

        optimizer.step()
        scheduler.step()


        if iter_i % 10 == 0:
            # logging

            current_lr = scheduler.get_lr()[0] * batch_size * subdivision
            print('[Iter %d/%d] [lr %f] '
                  '[Losses: xy %f, wh %f, conf %f, cls %f, total %f, imgsize %d]'
                  % (iter_i, iter_size, current_lr,
                     model.loss_dict['xy'], model.loss_dict['wh'],
                     model.loss_dict['conf'], model.loss_dict['cls'],
                     model.loss_dict['l2'], imgsize))

            if args.tfboard:
                tblogger.add_scalar('train/total_loss',
                                    model.loss_dict['l2'], iter_i)

            # random resizing
            # 变输入大小,利用了yolov3网络的全卷积,使得模型不受图像大小的改变而影响参数。
            if random_resize:
                imgsize = (random.randint(0, 9) % 10 + 10) * 32
                dataset.img_shape = (imgsize, imgsize)
                dataset.img_size = imgsize
                dataloader = torch.utils.data.DataLoader(
                    dataset, batch_size=batch_size, shuffle=True, num_workers=args.n_cpu)
                dataiterator = iter(dataloader)

        if iter_i % 100 == 0:
            model.eval()
            if datatype=='voc':
                img_file = os.path.join('../../VOCdevkit/VOC2007', 'JPEGImages', id_[0] + '.jpg')
            else:
                img_file = os.path.join('COCO', 'train2017',
                                        '{:012}'.format(id_) + '.jpg')
            img = cv2.imread(img_file)
            img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1))
            # print(img_raw.shape)
            # print(img_raw)
            # print(imgs)
            img, info_img_t = preprocess(img, imgsize, jitter=0)  # info = (h, w, nh, nw, dx, dy)
            img = np.transpose(img / 255., (2, 0, 1))
            img = torch.from_numpy(img).float().unsqueeze(0)
            img = Variable(img.type(torch.cuda.FloatTensor))
            outputs = model(img)
            #outputs.shape : torch.Size([1, 12348, 85])
            outputs = postprocess(outputs, 80, 0.5, 0.5)
            # imgs.shape : torch.Size([1, 3, 608, 608])
            # outputs[0].shape :torch.Size([3, 7])
            # targets.shape :torch.Size([1, 50, 5])
            # print(outputs)
            if outputs[0] is not None:
                bboxes = list()
                classes = list()
                colors = list()
                # print(info_img_t)
                info_img=tuple(info_img)
                # print(info_img)
                for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]:

                    cls_id = coco_class_ids[int(cls_pred)]
                    # print(int(x1), int(y1), int(x2), int(y2), float(conf), int(cls_pred))
                    # print('\t+ Label: %s, Conf: %.5f' %
                    #       (coco_class_names[cls_id], cls_conf.item()))
                    # print([y1, x1, y2, x2])
                    box = yolobox2label([y1, x1, y2, x2], info_img_t)
                    bboxes.append(box)
                    classes.append(cls_id)
                    colors.append(coco_class_colors[int(cls_pred)])
                vis_bbox(
                    img_raw, bboxes, label=classes, label_names=coco_class_names,
                    instance_colors=colors, linewidth=2)
                plt.savefig('output/'+str(iter_i)+'.jpg')
            model.train()

        # save checkpoint
        if iter_i > 0 and (iter_i % args.checkpoint_interval == 0):
            torch.save({'iter': iter_i,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        },
                       os.path.join(args.checkpoint_dir, "snapshot" + str(iter_i) + ".ckpt"))
    if args.tfboard:
        tblogger.close()
def main():
    args = parse_args()

    print("------------------------------------")
    print("    use {} dataset for demo.        ".format(args.data))
    print("------------------------------------")

    assert args.data in ['coco', 'drone']

    if torch.cuda.is_available() and args.gpu >= 0:
        device = torch.device('cuda:{}'.format(args.gpu))
    else:
        device = torch.device('cpu')

    # [TBM] gen n_classes from coco-format json file..
    if args.data == 'coco':
        cfg_path = 'config/yolov3_default.cfg'
        n_classes = 80
    if args.data == 'drone':
        cfg_path = 'config/yolov3_visdrone_default.cfg'
        n_classes = 10

    with open(cfg_path, 'r') as f:
        cfg = yaml.load(f)

    imgsize = cfg['TEST']['IMGSIZE']
    model = YOLOv3(n_classes=n_classes)
    confthre = cfg['TEST']['CONFTHRE']
    nmsthre = cfg['TEST']['NMSTHRE']

    if args.detect_thresh:
        confthre = args.detect_thresh

    model = model.to(device)

    if args.weights_path:
        print("loading yolo weights %s" % (args.weights_path))
        parse_yolo_weights(model, args.weights_path)
    else:
        print("loading checkpoint %s" % (args.ckpt))
        model.load_state_dict(torch.load(args.ckpt))

    model.eval()

    dir_name = os.path.basename(os.path.dirname(args.in_dir + '/'))
    out_dir = os.path.join(args.out_dir, dir_name)
    os.makedirs(out_dir, exist_ok=True)

    img_files = os.listdir(args.in_dir)
    img_files.sort()

    start = torch.cuda.Event(enable_timing=True)
    end = torch.cuda.Event(enable_timing=True)

    for i in range(0, len(img_files), args.step):

        filename = img_files[i]
        img_path = os.path.join(args.in_dir, filename)
        img = cv2.imread(img_path)
        assert img is not None

        start.record()

        img_raw = img.copy()[:, :, ::-1].transpose((2, 0, 1))
        img, info_img = preprocess(img,
                                   imgsize)  # info = (h, w, nh, nw, dx, dy)
        img = torch.from_numpy(img).float().unsqueeze(0)

        img = Variable(img.to(device, dtype=torch.float32))

        with torch.no_grad():
            outputs = model(img)
            outputs = postprocess(outputs, n_classes, confthre, nmsthre)

        end.record()
        torch.cuda.synchronize()

        # [TBM] gen label_names from coco-format json file..
        if args.data == 'coco':
            class_names, class_ids, class_colors = get_coco_label_names()
        if args.data == 'drone':
            class_names, class_ids, class_colors = get_visdrone_label_names()

        bboxes, classes, colors = list(), list(), list()

        if outputs[0] is None:
            outputs[0] = []

        if args.verbose:
            print("=====================================")

        print("{}, {:.2f} [fps]".format(filename,
                                        1000.0 / start.elapsed_time(end)))

        for x1, y1, x2, y2, conf, cls_conf, cls_pred in outputs[0]:
            cls_id = class_ids[int(cls_pred)]
            if args.verbose:
                print(int(x1), int(y1), int(x2), int(y2), float(conf),
                      int(cls_pred))
                print('\t+ Label: %s, Conf: %.5f' %
                      (class_names[cls_id], cls_conf.item()))
            box = yolobox2label([y1, x1, y2, x2], info_img)
            bboxes.append(box)
            classes.append(cls_id)
            colors.append(class_colors[int(cls_pred)])

        if args.verbose:
            print()

        vis_bbox(img_raw,
                 bboxes,
                 label=classes,
                 label_names=class_names,
                 instance_colors=colors,
                 linewidth=2)

        basename, _ = os.path.splitext(filename)
        out_path = os.path.join(out_dir, '{}.png'.format(basename))
        plt.savefig(out_path, bbox_inches=0, pad_inches=0, dpi=100)
        plt.close()

    print("Done!")