def test():
    import os
    im_file = 'demo/004545.jpg'
    image = cv2.imread(im_file)

    detector = FasterRCNN()
    network.load_net('/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5',
                     detector)
    detector.cuda()
    print('load model successfully!')

    # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector)
    # print('save model succ')

    t = Timer()
    t.tic()
    # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255
    dets, scores, classes = detector.detect(image, 0.3)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    for i, det in enumerate(dets):
        if scores[i] < 0.3:
            continue
        det = tuple(int(x) for x in det)
        cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2)
        cv2.putText(im2show,
                    '%s: %.3f' % (classes[i], scores[i]),
                    (det[0], det[1] + 15),
                    cv2.FONT_HERSHEY_PLAIN,
                    1.0, (0, 0, 255),
                    thickness=1)
    cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
示例#2
0
def test():
    import os
    im_file = 'demo/004545.jpg'
    # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg'
    # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg'
    image = cv2.imread(im_file)

    model_file = 'models/saved_model3/faster_rcnn_200000.pth'
    # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5'
    # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5'
    detector = FasterRCNN()
    detector.load_state_dict(torch.load(model_file))
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector)
    # print('save model succ')

    t = Timer()
    t.tic()
    # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255
    dets, scores, classes = detector.detect(image, 0.7)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    for i, det in enumerate(dets):
        det = tuple(int(x) for x in det)
        cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2)
        cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN,
                    1.0, (0, 0, 255), thickness=1)
    cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
    cv2.imshow('demo', im2show)
    cv2.waitKey(0)
    def __init__(self, weights=None):
        if weights is None:
            if not os.path.exists('weights'):
                os.mkdir('weights')
            download_url = 'https://github.com/ArnholdInstitute/ColdSpots/releases/download/1.0/faster-rcnn.zip'
            if not os.path.exists('weights/faster-rcnn'):
                print('Downloading weights for faster-rcnn')
                if not os.path.exists(os.path.join('weights/faster-rcnn.zip')):
                    check_output([
                        'wget', download_url, '-O', 'weights/faster-rcnn.zip'
                    ])
                print('Unzipping...')
                check_output(
                    ['unzip', 'weights/faster-rcnn.zip', '-d', 'weights'])
            description = json.load(
                open('weights/faster-rcnn/description.json'))
            weights = os.path.join('weights/faster-rcnn',
                                   description['weights'])
            print('Building model...')

        self.model = FasterRCNNModel(classes=['__backround__', 'building'],
                                     debug=False)
        network.load_net(weights, self.model)

        self.model.cuda()
        self.model.eval()
示例#4
0
def build_extractor(model_file, classes=None):
    if classes is None:
        extractor = FasterRCNN()
    else:
        extractor = FasterRCNN(classes)
    extractor.cuda()
    extractor.eval()
    network.load_net(model_file, extractor)
    print('load model successfully!')
    return extractor
示例#5
0
def main():
	global args
	print "Loading training set and testing set..."
	# train_set = visual_genome(args.dataset_option, 'train')
	test_set = visual_genome('small', 'test')
	object_classes = test_set.object_classes
	print "Done."

	# train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True)
	test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True)
	net = FasterRCNN(use_kmeans_anchors=args.use_kmeans_anchors, n_classes=len(object_classes), model=args.base_model)
	network.load_net('./output/detection/Faster_RCNN_small_vgg_12epoch_epoch_11.h5', net)
	# network.load_net('./output/detection/RPN_object1_best.h5', net)
	# network.set_trainable(net.features, requires_grad=False)
	net.cuda()

	# Testing
	recall = test(test_loader, net)

	print('Recall: '
	      'object: {recall: .3f}%'.format(recall=recall*100))
示例#6
0
def test():
    import os
    im_file = 'demo/004545.jpg'
    # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg'
    # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg'
    image = cv2.imread(im_file)

    # model_file = './VGGnet_fast_rcnn_iter_70000.h5'
    # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5'
    # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5'
    model_file = './models/saved_model_max/faster_rcnn_100000.h5'
    detector = FasterRCNN()
    network.load_net(model_file, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector)
    # print('save model succ')

    t = Timer()
    t.tic()
    # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255
    dets, scores, classes = detector.detect(image, 0.7)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    img = mpimg.imread(im_file)
    # Create figure and axes
    fig, ax = plt.subplots(1)

    # Display the image
    ax.imshow(img)
    # Create a Rectangle patch
    for i, det in enumerate(dets):
        w = det[2] - det[0]
        h = det[3] - det[1]
        rect = patches.Rectangle(det[0:2],
                                 w,
                                 h,
                                 linewidth=1,
                                 edgecolor='r',
                                 facecolor='none')
        # text
        plt.text(det[0], det[1], '%s: %.3f' % (classes[i], scores[i]))

        # Add the patch to the Axes
        ax.add_patch(rect)

    plt.show()
    print('aa')
示例#7
0
def load_model(model_file_path):

    detector = FasterRCNN()
    network.load_net(model_file_path, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    return detector
示例#8
0
def test(visualize=False):
    import os
    im_file = 'data/cervix/train/Type_2/1381.jpg'
    im_name = im_file.split('/')[-1]
    image = cv2.imread(im_file)

    # model_file = 'models/VGGnet_fast_rcnn_iter_70000.h5'
    model_file = 'models/saved_model3/faster_rcnn_100000.h5'
    expm = model_file.split('/')[-1].split('.')[0]
    expm_dir = os.path.join('demo', expm)
    if not os.path.exists(expm_dir):
        os.makedirs(expm_dir)

    detector = FasterRCNN()
    network.load_net(model_file, detector)
    detector.cuda()
    detector.eval(
    )  # set model in evaluation mode, has effect on Dropout and Batchnorm. Use train() to set train mode.
    print('load model successfully!')

    # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector)
    # print('save model succ')

    t = Timer()
    t.tic()
    # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255
    dets, scores, classes = detector.detect(image, 0.7)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    for i, det in enumerate(dets):
        det = tuple(int(x) for x in det)
        cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 4)
        cv2.putText(im2show,
                    '%s: %.3f' % (classes[i], scores[i]),
                    (det[0], det[1] + 15),
                    cv2.FONT_HERSHEY_PLAIN,
                    1.0, (0, 0, 255),
                    thickness=1)
    cv2.imwrite(os.path.join('demo', expm, im_name), im2show)

    if visualize:
        im2show = cv2.resize(im2show,
                             None,
                             None,
                             fx=0.15,
                             fy=0.15,
                             interpolation=cv2.INTER_LINEAR)
        cv2.imshow('demo', im2show)
        cv2.waitKey(0)
示例#9
0
 def analysis_video(self, result_dir):
     
     self.statusbar_stringvar.set('Analysis..Please wait..')
     model_file = 'model.h5'
     detector = FasterRCNN()
     network.load_net(model_file, detector)
     detector.cuda()
     detector.eval()
     print('load model successfully!')
     
     info_dict = {}
     info_dict['pictures'] = []
     for index in range(len(self.image_list)):
         accuracy = 0.
         pic_info = {}
         pic_info['objects'] = []
         dets, scores, classes = detector.detect(self.image_list[index], 0.8)
         im2show = np.copy(self.image_list[index])
         for i, det in enumerate(dets):
             object_info = {}
             det = tuple(int(x) for x in det)
             cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2)
             cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN,
                     1.0, (0, 0, 255), thickness=1)
             accuracy += scores[i]
             #object info initial
             object_info['name'] = classes[i]
             object_info['accuracy'] = scores[i]
             object_info['bbox'] = det
             pic_info['objects'].append(object_info)
             
         # pic_info initial
         
         pic_info['filename'] = os.path.basename(self.video_path).split('.')[0] + '_' + str(index + 1) + '.jpg'
         pic_info['size'] = im2show.shape
         info_dict['pictures'].append(pic_info)
         
         cv2.imwrite(os.path.join(result_dir, pic_info['filename']), im2show)
         self.view_table.update(index + 1, **{
                 'name': pic_info['filename'],
                 'accuracy': accuracy / len(classes),
                 'state': 'yes'
             })
     self.statusbar_stringvar.set('Analysis done!')
     return info_dict
示例#10
0
    def __init__(self, classes, n_action_classes, n_action_nonagent_roles,
                 **kwargs):
        super(HoiModel, self).__init__()
        print "Constructing HOI Model"

        faster_rcnn_config = kwargs.get("faster_rcnn_config", None)
        if faster_rcnn_config is not None:
            cf.cfg_from_file(faster_rcnn_config)

        faster_rcnn_cle = kwargs.get("faster_rcnn_command_line", None)
        if faster_rcnn_cle is not None:
            cf.cfg_from_list(faster_rcnn_cle)

        assert(cf.cfg["NCLASSES"] == len(classes)), \
                "inconsistent FasterRCNN settings"

        self.detection_branch = FasterRCNN(classes=classes)

        self.human_centric_branch = HumanCentricBranch(
            n_action_classes, n_action_nonagent_roles)
        self.interaction_branch = InteractionBranch(n_action_nonagent_roles)
            .format(i + 1, num_images, detect_time, nms_time)

        # if vis:
        #     cv2.imshow('test', im2show)
        #     cv2.waitKey(1)
        if sav:
            cv2.imwrite(output_dir_detections + str(i) + '.png', im2show)

    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)


if __name__ == '__main__':
    # load data
    imdb = get_imdb(imdb_name)
    imdb.competition_mode(on=True)

    # load net
    net = FasterRCNN(classes=imdb.classes, debug=False)
    network.load_net(trained_model, net)
    print('load model successfully!')

    net.cuda()
    net.eval()

    # evaluation
    test_net(net, imdb, max_per_image, thresh=thresh, vis=vis)
示例#12
0
        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, detect_time, nms_time)

        if vis:
            cv2.imshow('test', im2show)
            cv2.waitKey(1)

    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb.evaluate_detections(all_boxes, output_dir)


if __name__ == '__main__':
    # load data
    imdb = get_imdb(imdb_name)
    imdb.competition_mode(on=True)

    # load net
    net = FasterRCNN(classes=imdb.classes, debug=False, backbone='VGG')
    network.load_net(trained_model, net)
    print('load model successfully!')

    net.cuda()
    net.eval()

    # evaluation
    test_net(save_name, net, imdb, max_per_image, thresh=thresh, vis=vis)
示例#13
0
# load config
cfg_from_file(cfg_file) # overrides default config params
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load data
imdb = get_imdb(imdb_name)
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)

# load net
net = FasterRCNN(classes=imdb.classes, debug=_DEBUG)
network.weights_normal_init(net, dev=0.01)
# network.load_pretrained_npy(net, pretrained_model)
network.load_pretrained_model(net, 'vgg16')
# model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5'
# model_file = 'models/saved_model3/faster_rcnn_60000.h5'
# network.load_net(model_file, net)
# exp_name = 'vgg16_02-19_13-24'
# start_step = 60001
# lr /= 10.
# network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01)

net.cuda()
net.train() # set model in train mode, has effect on Dropout and Batchnorm. Use eval() to set evaluation mode.

params = list(net.parameters())
示例#14
0
                                    bboxes_pick[i, 3],
                                    cur_scores[i]
                                )
                        cur_rec += str_out
                    cur_rec += "\n"
                else:
                    cur_rec = "{} {}\n".format(file_path, 0)
                out_file.write(cur_rec)


if __name__ == "__main__":
    ctx = mx.gpu(9)
    net = FasterRCNN(
        len(cfg.anchor_ratios) * len(cfg.anchor_scales), 
        cfg.num_classes, 
        pretrained_model="vgg16",
        feature_name="vgg0_conv12_fwd_output",
        # pretrained_model="mobilenetv2_0.25", 
        # feature_name="mobilenetv20_features_linearbottleneck12_batchnorm2_fwd_output",
        ctx=ctx)
    net.init_params(ctx)
    net.collect_params().load("/world/data-gpu-112/zhanglinghan/face-detect-faster-rcnn-mx/faster-rcnn-vgg16-9anchors/faster-rcnn-vgg16-9anchors-280000.gluonmodel", ctx)
    

    global f_path
    path_lst = os.listdir("/world/data-c27/face_landmarks_hourglass/detection-input")
    path_lst.sort()
    for f_name in path_lst: 
        f_path = os.path.join("/world/data-c27/face_landmarks_hourglass/detection-input", f_name)
        print("processing {}".format(f_path))
        f_path_out = os.path.join("/world/data-c27/face_landmarks_hourglass/detection-output", f_name.strip())
        benchmark(net, ctx, f_path_out)
示例#15
0
def train():
    args = parse_args()
    args.decay_lrs = cfg.TRAIN.DECAY_LRS

    cfg.USE_GPU_NMS = True if args.use_cuda else False

    assert args.batch_size == 1, 'Only support single batch'

    lr = cfg.TRAIN.LEARNING_RATE
    momentum = cfg.TRAIN.MOMENTUM
    weight_decay = cfg.TRAIN.WEIGHT_DECAY
    gamma = cfg.TRAIN.GAMMA

    # initial tensorboardX writer
    if args.use_tfboard:
        if args.exp_name == 'default':
            writer = SummaryWriter()
        else:
            writer = SummaryWriter('runs/' + args.exp_name)

    if args.dataset == 'voc07trainval':
        args.imdb_name = 'voc_2007_trainval'
        args.imdbval_name = 'voc_2007_test'

    elif args.dataset == 'voc0712trainval':
        args.imdb_name = 'voc_2007_trainval+voc_2012_trainval'
        args.imdbval_name = 'voc_2007_test'
    else:
        raise NotImplementedError

    if args.net == 'res50':
        fname = 'resnet50-caffe.pth'
    elif args.net == 'res101':
        fname = 'resnet101-caffe.pth'
    else:
        raise NotImplementedError

    args.pretrained_model = os.path.join('data', 'pretrained', fname)

    output_dir = args.output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # dataset_cachefile = os.path.join(output_dir, 'dataset.pickle')
    # if not os.path.exists(dataset_cachefile):
    #     imdb, roidb = combined_roidb(args.imdb_name)
    #     cache = [imdb, roidb]
    #     with open(dataset_cachefile, 'wb') as f:
    #         pickle.dump(cache, f)
    #     print('save dataset cache')
    # else:
    #     with open(dataset_cachefile, 'rb') as f:
    #         cache = pickle.load(f)
    #         imdb, roidb = cache[0], cache[1]
    #         print('loaded dataset from cache')

    imdb, roidb = combined_roidb(args.imdb_name)

    train_dataset = RoiDataset(roidb)
    train_dataloader = DataLoader(train_dataset, args.batch_size, shuffle=True)

    model = FasterRCNN(backbone=args.net, pretrained=args.pretrained_model)
    print('model loaded')

    # if cfg.PRETRAINED_RPN:
    #     rpn_model_path = 'output/rpn.pth'
    #     model.load_state_dict(torch.load(rpn_model_path)['model'])
    #     print('loaded rpn!')

    # optimizer
    params = []
    for key, value in dict(model.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \
                            'weight_decay': cfg.TRAIN.BIAS_DECAY and weight_decay or 0}]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': weight_decay
                }]

    optimizer = SGD(params, momentum=momentum)

    if args.use_cuda:
        model = model.cuda()

    model.train()

    iters_per_epoch = int(len(train_dataset) / args.batch_size)

    # start training
    for epoch in range(args.start_epoch, args.max_epochs + 1):
        loss_temp = 0
        rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0
        rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0
        tic = time.time()
        train_data_iter = iter(train_dataloader)

        if epoch in args.decay_lrs:
            lr = lr * gamma
            adjust_learning_rate(optimizer, lr)
            print('adjust learning rate to {}'.format(lr))

        for step in range(iters_per_epoch):
            im_data, gt_boxes, im_info = next(train_data_iter)
            if args.use_cuda:
                im_data = im_data.cuda()
                gt_boxes = gt_boxes.cuda()
                im_info = im_info.cuda()

            im_data_variable = Variable(im_data)

            output = model(im_data_variable, gt_boxes, im_info)
            rois, _, _, \
            rcnn_cls_loss, rcnn_box_loss, \
            rpn_cls_loss, rpn_box_loss, _train_info = output

            loss = rcnn_cls_loss.mean() + rcnn_box_loss.mean() +\
                   rpn_cls_loss.mean() + rpn_box_loss.mean()

            optimizer.zero_grad()

            loss.backward()
            optimizer.step()

            loss_temp += loss.item()

            if cfg.VERBOSE:
                rpn_tp += _train_info['rpn_tp']
                rpn_tn += _train_info['rpn_tn']
                rpn_fg += _train_info['rpn_num_fg']
                rpn_bg += _train_info['rpn_num_bg']
                rcnn_tp += _train_info['rcnn_tp']
                rcnn_tn += _train_info['rcnn_tn']
                rcnn_fg += _train_info['rcnn_num_fg']
                rcnn_bg += _train_info['rcnn_num_bg']

            if (step + 1) % args.display_interval == 0:
                toc = time.time()
                loss_temp /= args.display_interval
                rpn_cls_loss_v = rpn_cls_loss.mean().item()
                rpn_box_loss_v = rpn_box_loss.mean().item()
                rcnn_cls_loss_v = rcnn_cls_loss.mean().item()
                rcnn_box_loss_v = rcnn_box_loss.mean().item()

                print("[epoch %2d][step %4d/%4d] loss: %.4f, lr: %.2e, time cost %.1fs" \
                      % (epoch, step+1, iters_per_epoch, loss_temp, lr, toc - tic))
                print("\t\t\t rpn_cls_loss_v: %.4f, rpn_box_loss_v: %.4f\n\t\t\t "
                      "rcnn_cls_loss_v: %.4f, rcnn_box_loss_v: %.4f" \
                      % (rpn_cls_loss_v, rpn_box_loss_v, rcnn_cls_loss_v, rcnn_box_loss_v))
                if cfg.VERBOSE:
                    print('\t\t\t RPN : [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' %
                          (rpn_fg, rpn_bg, float(rpn_tp) / rpn_fg,
                           float(rpn_tn) / rpn_bg))
                    print('\t\t\t RCNN: [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' %
                          (rcnn_fg, rcnn_bg, float(rcnn_tp) / rcnn_fg,
                           float(rcnn_tn) / rcnn_bg))

                if args.use_tfboard:
                    n_iter = (epoch - 1) * iters_per_epoch + step + 1
                    writer.add_scalar('losses/loss', loss_temp, n_iter)
                    writer.add_scalar('losses/rpn_cls_loss_v', rpn_cls_loss_v,
                                      n_iter)
                    writer.add_scalar('losses/rpn_box_loss_v', rpn_box_loss_v,
                                      n_iter)
                    writer.add_scalar('losses/rcnn_cls_loss_v',
                                      rcnn_cls_loss_v, n_iter)
                    writer.add_scalar('losses/rcnn_box_loss_v',
                                      rcnn_box_loss_v, n_iter)

                    if cfg.VERBOSE:
                        writer.add_scalar('rpn/fg_acc',
                                          float(rpn_tp) / rpn_fg, n_iter)
                        writer.add_scalar('rpn/bg_acc',
                                          float(rpn_tn) / rpn_bg, n_iter)
                        writer.add_scalar('rcnn/fg_acc',
                                          float(rcnn_tp) / rcnn_fg, n_iter)
                        writer.add_scalar('rcnn/bg_acc',
                                          float(rcnn_tn) / rcnn_bg, n_iter)

                loss_temp = 0
                rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0
                rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0
                tic = time.time()

        if epoch % args.save_interval == 0:
            save_name = os.path.join(
                output_dir, 'faster_{}_epoch_{}.pth'.format(args.net, epoch))
            torch.save({
                'model': model.state_dict(),
                'epoch': epoch,
                'lr': lr
            }, save_name)
示例#16
0
from faster_rcnn.rpn_proposal import proposal_train

def train_transformation(data, label):
    data, label = random_flip(data, label)
    data = imagenetNormalize(data)
    return data, label

train_dataset = VOCDataset(annotation_dir=cfg.annotation_dir,
                           img_dir=cfg.img_dir,
                           dataset_index=cfg.dataset_index,
                           transform=train_transformation,
                           resize_func=img_resize)

train_datait = mx.gluon.data.DataLoader(train_dataset, batch_size=1, shuffle=True)
ctx = mx.gpu(0)
net = FasterRCNN(len(cfg.anchor_ratios) * len(cfg.anchor_scales), cfg.num_classes)
net.init_params(ctx)
trainer = mx.gluon.trainer.Trainer(net.collect_params(), 
                                    'sgd', 
                                    {'learning_rate': 0.001,
                                     'wd': 0.0005,
                                     'momentum': 0.9})

anchors_count = len(cfg.anchor_ratios) * len(cfg.anchor_scales)
first_iter_saved_toggle = True

for epoch in range(1, 21):
    for it, (data, label) in enumerate(train_datait):
        data = data.as_in_context(ctx)
        _n, _c, h, w = data.shape
        label = label.as_in_context(ctx).reshape((1, -1, 5))
示例#17
0
    return data, label


test_dataset = VOCDataset(annotation_dir=cfg.test_annotation_dir,
                          img_dir=cfg.test_img_dir,
                          dataset_index=cfg.test_dataset_index,
                          transform=test_transformation,
                          resize_func=None)

# test_datait = mx.gluon.data.DataLoader(test_dataset, batch_size=1, shuffle=False)
ctx = mx.gpu(0)
args = parse_args()
print("Load model: {}".format(args.model_file))

net = FasterRCNN(len(cfg.anchor_ratios) * len(cfg.anchor_scales),
                 cfg.num_classes,
                 feature_name=args.feature_name)
net.init_params(ctx)
net.collect_params().load(args.model_file, ctx)
det_result = []
prograss_bar = tqdm(total=len(test_dataset))

for it, data_id in enumerate(range(len(test_dataset))):
    data, label = test_dataset[data_id]
    data = data.asnumpy()
    data = data.transpose(1, 2, 0)
    data, scale = img_resize(data)
    data = data.transpose(2, 0, 1)
    data_id = test_dataset.dataset_index[data_id]
    data = mx.nd.array(data, ctx=ctx)
    _c, h, w = data.shape
        nms_time = _t['misc'].toc(average=False)

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, detect_time, nms_time)
    df = pandas.DataFrame(all_boxes)
    df.columns = ['x1', 'y1', 'x2', 'y2', 'score', 'image_id']
    df.to_csv('predictions.csv', index=False)
    print('Total time: %.4f, per image: %.4f' %
          (total_time, total_time / num_images))


if __name__ == '__main__':

    # load net
    net = FasterRCNN(classes=['__backround__', 'building'], debug=False)
    network.load_net(args.weights, net)
    print('load model successfully!')

    net.cuda()
    net.eval()

    val_data = json.load(open(args.test_boxes))

    # evaluation
    test_net(net,
             val_data,
             max_per_image,
             thresh=thresh,
             vis=vis,
             data_dir='../data')
def test():
    # Set up dataloader
    data_loader = DAVIS_seq_dataloader(split='val')

    model_file = './model/VGGnet_fast_rcnn_iter_70000.h5'
    detector = FasterRCNN()
    network.load_net(model_file, detector)
    detector.cuda()
    detector.eval()
    print('Load Faster R-CNN model successfully!')

    # unet_model = './model/vgg_unet_1e-4_500.h5'
    # unet = UNet()
    # network.load_net(unet_model, unet)
    # unet.cuda()
    # network.weights_normal_init(unet, dev=0.01)
    # unet.load_from_faster_rcnn_h5(h5py.File(model_file))
    criterion_bce = torch.nn.BCELoss().cuda()
    weight_decay = 5e-5
    # optimizer = torch.optim.SGD(list(unet.parameters())[26:], lr=1e-4, weight_decay=weight_decay)
    # print('Load U-Net model successfully!')

    crop_set = []
    # Iterate
    for i in range(data_loader.num_seq):
        # Get the first frame info
        seq = data_loader.seq_list[data_loader.out_pointer]
        seq_len = data_loader.seq_len[seq]
        img_blobs, seg_blobs = data_loader.get_next_minibatch()
        img = img_blobs[0,:,:,:]
        im_data, im_scales = detector.get_image_blob(img)
        im_info = np.array([[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32)
        # Get the category of the object in the first frame
        rmin, rmax, cmin, cmax = bbox(seg_blobs[0,:,:,0])
        features, rois = detector(im_data, im_info, rpn_only=True)
        new_rois_np = np.array([[0, cmin, rmin, cmax, rmax]], dtype=np.float32)
        new_rois_t = torch.from_numpy(new_rois_np).cuda()
        new_rois = Variable(new_rois_t, requires_grad=False)
        pooled_features = detector.roi_pool(features, new_rois)
        x = pooled_features.view(pooled_features.size()[0], -1)
        x = detector.fc6(x)
        x = detector.fc7(x)
        cls_score = detector.score_fc(x)
        cls_prob = F.softmax(cls_score)
        bbox_pred = detector.bbox_fc(x)
        cls_prob_np = cls_prob.cpu().data.numpy()
        bbox_pred_np = bbox_pred.cpu().data.numpy()
        cls_idx = cls_prob_np.argmax()
        cls_conf = cls_prob_np.max()

        # Overfit U-Net with the first frame
        # for i in range(100):
        #     unet.train()
        #     img_t = torch.from_numpy(img_blobs).permute(0,3,1,2).float().cuda()
        #     img_v = Variable(img_t, requires_grad=False)
        #     seg_t = torch.from_numpy(seg_blobs).permute(0,3,1,2).float().cuda()
        #     seg_v = Variable(seg_t, requires_grad=False)
        #     pred = unet(img_v)
            # loss = criterion_bce(pred, seg_v)
        #     pred_view = pred.view(-1, 1)
        #     seg_view = seg_v.view(-1, 1)    
        #     EPS = 1e-6
        #     loss = 0.6 * seg_view.mul(torch.log(pred_view+EPS)) + 0.4 * seg_view.mul(-1).add(1).mul(torch.log(1-pred+EPS))
        #     loss = -torch.mean(loss)
        #     loss_val = loss.data[0]
        #     optimizer.zero_grad()
        #     loss.backward()
        #     optimizer.step()
        #     print('{}/100: {}'.format(i, loss_val))
        # unet.eval()

        # Merge region proposals overlapping with last frame proposal
        for j in range(1, seq_len):
            img_blobs, _ = data_loader.get_next_minibatch()
            img = img_blobs[0,:,:,:]
            im_data, im_scales = detector.get_image_blob(img)
            # 300 x 5, the first elements are useless here
            features, rois = detector(im_data, im_info, rpn_only=True)
            x1, y1, x2, y2 = merge_rois((rmin, rmax, cmin, cmax), rois.cpu().data.numpy(), thres=0.75)

            # Have overlapping proposals
            if x1 is not None:
                # Send to following layers to refine the bbox
                new_rois_np = np.array([[0, x1, y1, x2, y2]], dtype=np.float32)
                new_rois_t = torch.from_numpy(new_rois_np).cuda()
                new_rois = Variable(new_rois_t, requires_grad=False)
                pooled_features = detector.roi_pool(features, new_rois)
                x = pooled_features.view(pooled_features.size()[0], -1)
                x = detector.fc6(x)
                x = detector.fc7(x)
                cls_score = detector.score_fc(x)
                cls_prob = F.softmax(cls_score)
                bbox_pred = detector.bbox_fc(x)
                cls_prob_np = cls_prob.cpu().data.numpy()
                bbox_pred_np = bbox_pred.cpu().data.numpy()

                # Only regress bbox when confidence is greater than 0.8
                if cls_prob_np.max() > 0.8 and cls_prob_np.argmax() != 0:
                    keep = cls_prob_np.argmax()
                    pred_boxes, scores, classes = detector.interpret_faster_rcnn(cls_prob, bbox_pred, new_rois, im_info, im_data.shape, 0.8)

                    cx = (x1 + x2) / 2
                    cy = (y1 + y2) / 2
                    width = x2 - x1 + 1
                    height = y2 - y1 + 1
                    dx = bbox_pred_np[0,keep*4+0]
                    dy = bbox_pred_np[0,keep*4+1]
                    dw = bbox_pred_np[0,keep*4+2]
                    dh = bbox_pred_np[0,keep*4+3]
            
                    pred_x = dx * width + cx
                    pred_y = dy * height + cy
                    pred_w = np.exp(dw) * width
                    pred_h = np.exp(dh) * height

                    x1 = pred_x - pred_w / 2
                    x2 = pred_x + pred_w / 2
                    y1 = pred_y - pred_h / 2
                    y2 = pred_y + pred_h / 2

            # No overlapping proposals
            if x1 is None:
                # Using Faster R-CNN again to find potential objects
                dets, scores, classes = detector.detect(img, 0.6)
                # Cannot find any salient object
                if dets.shape[0] == 0:
                    x1, y1, x2, y2 = cmin, rmin, cmax, rmax
                else:
                    x1 = dets[:,0]
                    y1 = dets[:,1]
                    x2 = dets[:,2]
                    y2 = dets[:,3]
                    pred_area = (x2 - x1 + 1) * (y2 - y1 + 1)
                    init_area = (cmax - cmin + 1) * (rmax - rmin + 1)
                    xx1 = np.maximum(x1, cmin)
                    xx2 = np.minimum(x2, cmax)
                    yy1 = np.maximum(y1, rmin)
                    yy2 = np.minimum(y2, rmax)
                    inter = (xx2 - xx1 + 1) * (yy2 - yy1 + 1)
                    ovr = inter / (pred_area + init_area - inter)
                    # If there is overlapping, choose the largest IoU bbox
                    try:
                        ovr = ovr[ovr > 0.3]
                        ovr_idx = np.argsort(ovr)[-1]
                        x1 = dets[ovr_idx,0]
                        y1 = dets[ovr_idx,1]
                        x2 = dets[ovr_idx,2]
                        y2 = dets[ovr_idx,3]
                    # Else, choose the highest objectness score one
                    except:
                        if cls_idx == 0:
                            temp_idx = scores.argmax()
                            x1 = dets[temp_idx,0]
                            y1 = dets[temp_idx,1]
                            x2 = dets[temp_idx,2]
                            y2 = dets[temp_idx,3]
                        else:
                            cx = (x1 + x2) / 2
                            cy = (y1 + y2) / 2
                            cc = (cmin + cmax) / 2
                            cr = (rmin + rmax) / 2
                            dist = np.sqrt(np.square(cx-cc) + np.square(cy-cr))
                            dist_idx = np.argsort(dist)
                            for di in dist_idx:
                                if classes[di] == _CLASSES[cls_idx]:
                                    x1 = dets[di,0]
                                    y1 = dets[di,1]
                                    x2 = dets[di,2]
                                    y2 = dets[di,3]

            # Crop the region and send it to U-Net
            try:
                x1 = int(max(x1, 0))
                x2 = int(min(x2, im_data.shape[2]))
                y1 = int(max(y1, 0))
                y2 = int(min(y2, im_data.shape[1]))
            except:
                x1 = int(max(x1[0], 0))
                x2 = int(min(x2[0], im_data.shape[2]))
                y1 = int(max(y1[0], 0))
                y2 = int(min(y2[0], im_data.shape[1]))

            # MEAN_PIXEL = np.array([103.939, 116.779, 123.68])
            # crop = img_blobs[:, y1:y2+1, x1:x2+1, :] - MEAN_PIXEL
            # crop = img_blobs[:,:,:,:] - MEAN_PIXEL
            # crop_v = Variable(torch.from_numpy(crop).permute(0, 3, 1, 2).cuda(), requires_grad=False)
            # pred = unet(crop_v)
            # pred_np = pred.cpu().data.numpy()[0,0,:,:]
            # pred_np[pred_np < 0.5] = 0
            # pred_np[pred_np >= 0.5] = 1
            # pred_np = pred_np * 255
            # res = pred_np.astype(int)
            # cv2.imwrite('test.png', res)

            if y2 - y1 <= 1 or x2 - x1 <= 1:
                ipdb.set_trace()
            cv2.imwrite(os.path.join('demo', 'crop_{}_{}.png'.format(i, j)), img[y1:y2+1,x1:x2+1,:])

            rmin = y1
            rmax = y2
            cmin = x1
            cmax = x2

            im2show = np.copy(img)
            cv2.rectangle(im2show, (int(x1),int(y1)), (int(x2),int(y2)), (0, 255, 0), 2)
            cv2.imwrite(os.path.join('demo', '{}_{}.jpg'.format(i, j)), im2show)
            temp = [i, j, x1, y1, x2, y2]
            crop_set.append(temp)

    # Save
    crop_set = np.array(crop_set)
    np.save('crop', crop_set)
示例#20
0
# load config
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY

# load data
imdb = get_imdb(imdb_name)
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)

# load net
net = FasterRCNN(classes=imdb.classes, debug=_DEBUG, training=True)

net.cuda()
net.train()
net.apply(weight_init)  #-- parameters initialize

#----download resnet101 weights-----
pretrained_state = torch.load(pretrained_model)
net.resnet.load_state_dict({
    k: v
    for k, v in pretrained_state.items() if k in net.resnet.state_dict()
})
for p in net.resnet.conv1.parameters():
    p.requires_grad = False
for p in net.resnet.bn1.parameters():
    p.requires_grad = False
# load config
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load data
imdb = get_imdb(imdb_name)
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)

# load net
net = FasterRCNN(classes=imdb.classes, debug=_DEBUG)
network.weights_normal_init(net, dev=0.01)
network.load_pretrained_npy(net, pretrained_model)
# model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5'
# model_file = 'models/saved_model3/faster_rcnn_60000.h5'
# network.load_net(model_file, net)
# exp_name = 'vgg16_02-19_13-24'
# start_step = 60001
# lr /= 10.
# network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01)

net.cuda()
net.train()

params = list(net.parameters())
# optimizer = torch.optim.Adam(params[-8:], lr=lr)
class FasterRCNN:
    def __init__(self, weights=None):
        if weights is None:
            if not os.path.exists('weights'):
                os.mkdir('weights')
            download_url = 'https://github.com/ArnholdInstitute/ColdSpots/releases/download/1.0/faster-rcnn.zip'
            if not os.path.exists('weights/faster-rcnn'):
                print('Downloading weights for faster-rcnn')
                if not os.path.exists(os.path.join('weights/faster-rcnn.zip')):
                    check_output([
                        'wget', download_url, '-O', 'weights/faster-rcnn.zip'
                    ])
                print('Unzipping...')
                check_output(
                    ['unzip', 'weights/faster-rcnn.zip', '-d', 'weights'])
            description = json.load(
                open('weights/faster-rcnn/description.json'))
            weights = os.path.join('weights/faster-rcnn',
                                   description['weights'])
            print('Building model...')

        self.model = FasterRCNNModel(classes=['__backround__', 'building'],
                                     debug=False)
        network.load_net(weights, self.model)

        self.model.cuda()
        self.model.eval()

    def close_session(self):
        pass

    def predict_image(self, image, threshold, eval_mode=False):
        """
        Infer buildings for a single image.
        Inputs:
            image :: n x m x 3 ndarray - Should be in RGB format
        """

        if type(image) is str:
            image = cv2.imread(image)
        else:
            image = image[:, :, (2, 1, 0)]  # RGB -> BGR

        im_data, im_scales = self.model.get_image_blob(image)
        im_info = np.array(
            [[im_data.shape[1], im_data.shape[2], im_scales[0]]],
            dtype=np.float32)

        t0 = time.time()
        cls_prob, bbox_pred, rois = self.model(im_data, im_info)
        runtime = time.time() - t0

        scores = cls_prob.data.cpu().numpy()
        boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2]

        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred.data.cpu().numpy()
            pred_boxes = bbox_transform_inv(boxes, box_deltas)
            pred_boxes = clip_boxes(pred_boxes, image.shape)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        current = np.concatenate(
            [
                pred_boxes[:, 4:8],  # (skip the background class)
                np.expand_dims(scores[:, 1], 1)
            ],
            axis=1)

        suppressed = current[py_cpu_nms(current.astype(np.float32), 0.3)]
        suppressed = pandas.DataFrame(
            suppressed, columns=['x1', 'y1', 'x2', 'y2', 'score'])
        if eval_mode:
            return suppressed[
                suppressed['score'] >= threshold], suppressed, runtime
        else:
            return suppressed[suppressed['score'] >= threshold]

    def predict_all(self, test_boxes_file, threshold, data_dir=None):
        test_boxes = json.load(open(test_boxes_file))
        if data_dir is None:
            data_dir = os.path.join(os.path.dirname(test_boxes_file))

        total_time = 0.0

        for i, anno in enumerate(test_boxes):
            orig_img = cv2.imread(
                '%s/%s' % (data_dir, anno['image_path']))[:, :, (2, 1, 0)]

            pred, all_rects, time = self.predict_image(orig_img,
                                                       threshold,
                                                       eval_mode=True)

            pred['image_id'] = i
            all_rects['image_id'] = i

            yield pred, all_rects, test_boxes[i]
示例#23
0
	# create VGG model for state featurization
	print("Loading image embedding model...")
        if args.image_embedding_model_type == "resnet":
	    im_emb_model = ResNet50()
        elif args.image_embedding_model_type == "vgg":
            im_emb_model = VGG16()
        else:
            print("--image_embedding_model_type must be either resnet or vgg")
            sys.exit(0)
	print("Done!")

	# create Faster-RCNN model for state featurization
	print("Loading Fast-RCNN...")
	model_file = 'VGGnet_fast_rcnn_iter_70000.h5'
	model_frcnn = FasterRCNN()
	network.load_net(model_file, model_frcnn)
	model_frcnn.cuda()
	model_frcnn.eval()
	print("Done!")

	# create DQN's for the next object, predicates, and attributes
	print("Creating DQN models...")
	DQN_next_object_main = DQN_MLP(2048*3+9600 + parameters["maximum_num_entities_per_image"], 1)
	DQN_next_object_target = DQN_MLP(2048*3+9600 + parameters["maximum_num_entities_per_image"], 1)
	DQN_predicate_main = DQN_MLP(2048*3+9600 + len(semantic_action_graph.predicate_nodes), 1)
	DQN_predicate_target = DQN_MLP(2048*3+9600 + len(semantic_action_graph.predicate_nodes), 1)
	DQN_attribute_main = DQN_MLP(2048*3+9600 + len(semantic_action_graph.attribute_nodes), 1)
	DQN_attribute_target = DQN_MLP(2048*3+9600 + len(semantic_action_graph.attribute_nodes), 1)
	print("Done!")
示例#24
0
# load config
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load data
imdb = get_imdb(imdb_name)
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)

# load net
net = FasterRCNN(classes=imdb.classes, debug=_DEBUG)
network.weights_normal_init(net, dev=0.01)
#network.load_pretrained_npy(net, pretrained_model)
# model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5'
# model_file = 'models/saved_model3/faster_rcnn_60000.h5'
# network.load_net(model_file, net)
# exp_name = 'vgg16_02-19_13-24'
# start_step = 60001
# lr /= 10.
# network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01)

if os.path.exists('pretrained_vgg.pkl'):
    pret_net = pkl.load(open('pretrained_vgg.pkl','rb'))
else:
    pret_net = model_zoo.load_url('https://download.pytorch.org/models/vgg16-397923af.pth')
    pkl.dump(pret_net, open('pretrained_vgg.pkl','wb'), pkl.HIGHEST_PROTOCOL)
示例#25
0
# load config
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load data
imdb = get_imdb(imdb_name)
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)

# load net
net = FasterRCNN(classes=imdb.classes, debug=_DEBUG)
network.weights_normal_init(net, dev=0.01)
network.load_pretrained_npy(net, pretrained_model)
# model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5'
# model_file = 'models/saved_model3/faster_rcnn_60000.h5'
# network.load_net(model_file, net)
# exp_name = 'vgg16_02-19_13-24'
# start_step = 60001
# lr /= 10.
# network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01)

net.cuda()
net.train()

params = list(net.parameters())
# optimizer = torch.optim.Adam(params[-8:], lr=lr)
示例#26
0
def train():

    args = parse_args()
    lr = args.lr
    decay_lrs = args.decay_lrs
    momentum = args.momentum
    weight_decay = args.weight_decay
    bais_decay = args.bais_decay
    gamma = args.gamma

    cfg.USE_GPU_NMS = True if args.use_gpu else False

    if args.use_tfboard:
        writer = SummaryWriter()

    # load data
    print('load data')
    if args.dataset == 'voc07trainval':
        dataset_name = 'voc_2007_trainval'
    elif args.dataset == 'voc12trainval':
        dataset_name = 'voc_2012_trainval'
    elif args.dataset == 'voc0712trainval':
        dataset_name = 'voc_2007_trainval+voc_2012_trainval'
    else:
        raise NotImplementedError

    imdb, roidb = combined_roidb(dataset_name)
    train_dataset = RoiDataset(roidb)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch_size,
                                  shuffle=True)
    iter_per_epoch = int(len(train_dataset) / args.batch_size)

    # prepare model
    print('load model')
    model = FasterRCNN(backbone=args.backbone)
    params = []
    for key, value in dict(model.named_parameters()).items():
        if value.requires_grad:
            if 'bias' in key:
                params += [{
                    'params': [value],
                    'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1),
                    'weight_decay': bais_decay and weight_decay or 0
                }]
            else:
                params += [{
                    'params': [value],
                    'lr': lr,
                    'weight_decay': weight_decay
                }]

    if args.use_gpu:
        model = model.cuda()

    model.train()

    # define optimizer
    optimizer = SGD(params, momentum=momentum)

    # training
    print('start training...')
    for epoch in range(args.epochs):
        start_time = time.time()
        train_data_iter = iter(train_dataloader)
        temp_loss = 0
        rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0
        faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0

        if epoch in decay_lrs:
            lr = lr * gamma
            adjust_lr(optimizer, lr)
            print('adjusting learning rate to {}'.format(lr))

        for step in range(iter_per_epoch):
            im_data, gt_boxes, im_info = next(train_data_iter)

            if args.use_gpu:
                im_data = im_data.cuda()
                gt_boxes = gt_boxes.cuda()
                im_info = im_info.cuda()

            im_data_variable = Variable(im_data)

            outputs = model(im_data_variable, gt_boxes, im_info)
            rois, _, _, faster_rcnn_cls_loss, faster_rcnn_reg_loss, \
            rpn_cls_loss, rpn_reg_loss, _train_info = outputs

            loss = faster_rcnn_cls_loss.mean() + faster_rcnn_reg_loss.mean() + \
                   rpn_cls_loss.mean() + rpn_reg_loss.mean()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            temp_loss += loss.item()

            if cfg.VERBOSE:
                rpn_tp += _train_info['rpn_tp']
                rpn_tn += _train_info['rpn_tn']
                rpn_fg += _train_info['rpn_num_fg']
                rpn_bg += _train_info['rpn_num_bg']
                faster_rcnn_tp += _train_info['faster_rcnn_tp']
                faster_rcnn_tn += _train_info['faster_rcnn_tn']
                faster_rcnn_fg += _train_info['faster_rcnn_num_fg']
                faster_rcnn_bg += _train_info['faster_rcnn_num_bg']

            if (step + 1) % args.display_interval == 0:
                end_time = time.time()
                temp_loss /= args.display_interval
                rpn_cls_loss_m = rpn_cls_loss.mean().item()
                rpn_reg_loss_m = rpn_reg_loss.mean().item()
                faster_rcnn_cls_loss_m = faster_rcnn_cls_loss.mean().item()
                faster_rcnn_reg_loss_m = faster_rcnn_reg_loss.mean().item()

                print('[epoch %2d][step %4d/%4d] loss: %.4f, time_cost: %.1f' %
                      (epoch, step + 1, iter_per_epoch, temp_loss,
                       end_time - start_time))
                print(
                    'loss: rpn_cls_loss_m: %.4f, rpn_reg_loss_m: %.4f, faster_rcnn_cls_loss_m: %.4f, faster_rcnn_reg_loss_m: %.4f'
                    % (rpn_cls_loss_m, rpn_reg_loss_m, faster_rcnn_cls_loss_m,
                       faster_rcnn_reg_loss_m))

                if args.use_tfboard:
                    n_iter = epoch * iter_per_epoch + step + 1
                    writer.add_scalar('losses/loss', temp_loss, n_iter)
                    writer.add_scalar('losses/rpn_cls_loss_m', rpn_cls_loss_m,
                                      n_iter)
                    writer.add_scalar('losses/rpn_reg_loss_m', rpn_reg_loss_m,
                                      n_iter)
                    writer.add_scalar('losses/faster_rcnn_cls_loss_m',
                                      faster_rcnn_cls_loss_m, n_iter)
                    writer.add_scalar('losses/faster_rcnn_reg_loss_m',
                                      faster_rcnn_reg_loss_m, n_iter)

                    if cfg.VERBOSE:
                        writer.add_scalar('rpn/fg_acc',
                                          float(rpn_tp) / rpn_fg, n_iter)
                        writer.add_scalar('rpn/bg_acc',
                                          float(rpn_tn) / rpn_bg, n_iter)
                        writer.add_scalar(
                            'rcnn/fg_acc',
                            float(faster_rcnn_tp) / faster_rcnn_fg, n_iter)
                        writer.add_scalar(
                            'rcnn/bg_acc',
                            float(faster_rcnn_tn) / faster_rcnn_bg, n_iter)

                temp_loss = 0
                rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0
                faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0
                start_time = time.time()

        if not os.path.exists(args.output_dir):
            os.mkdir(args.output_dir)

        if epoch % args.save_interval == 0:
            save_name = os.path.join(
                args.output_dir, 'faster_rcnn101_epoch_{}.pth'.format(epoch))
            torch.save({
                'model': model.state_dict(),
                'epoch': epoch,
                'lr': lr
            }, save_name)
# ------------

if rand_seed is not None:
    np.random.seed(rand_seed)

# load config
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load net
net = FasterRCNN(classes=['__background__', 'building'], debug=_DEBUG)
network.weights_normal_init(net, dev=0.01)
network.load_pretrained_npy(net, pretrained_model)

net.cuda()
net.train()

params = list(net.parameters())
optimizer = torch.optim.SGD(params[8:],
                            lr=lr,
                            momentum=momentum,
                            weight_decay=weight_decay)

if not os.path.exists(output_dir):
    os.makedirs(output_dir)
示例#28
0
def main():
    global args, logger
    train_ds = train_dataset()
    # only support batch_size = 1 so far
    train_datait = mx.gluon.data.DataLoader(train_ds,
                                            batch_size=1,
                                            shuffle=True)

    ctx = [mx.gpu(i) for i in range(len(args.gpus.split(",")))]
    ctx = ctx[0]
    net = FasterRCNN(len(cfg.anchor_ratios) * len(cfg.anchor_scales),
                     cfg.num_classes,
                     feature_name=args.feature_name)
    net.init_params(ctx)
    if args.pretrained_model != "":
        net.collect_params().load(args.pretrained_model, ctx)
        logger.info("loading {}".format(args.pretrained_model))

    lr_schdl = mx.lr_scheduler.FactorScheduler(step=20000, factor=0.9)
    trainer = mx.gluon.trainer.Trainer(net.collect_params(),
                                       'sgd',
                                       optimizer_params={
                                           'learning_rate': args.learning_rate,
                                           'wd': args.weight_decay,
                                           "lr_scheduler": lr_schdl,
                                           'momentum': 0.9
                                       })
    anchors_count = len(cfg.anchor_ratios) * len(cfg.anchor_scales)

    for epoch in range(0, args.epochs):
        last_iter_end_timestamp = time.time()
        for it, (data, label) in enumerate(train_datait):
            data_loaed_time = time.time()
            data = data.as_in_context(ctx)
            _n, _c, h, w = data.shape
            label = label.as_in_context(ctx).reshape((1, -1, 5))
            with mx.autograd.record():
                rpn_cls, rpn_reg, f = net.rpn(data)
                f_height = f.shape[2]
                f_width = f.shape[3]
                rpn_cls_gt, rpn_reg_gt = rpn_gt_opr(rpn_reg.shape, label, ctx,
                                                    h, w)
                rpn_bbox_sampled, rcnn_reg_target, rcnn_cls_target = proposal_train(
                    rpn_cls, rpn_reg, label, f.shape, data.shape, ctx)

                # RPN Loss part
                # Reshape and transpose to the shape of gt
                rpn_cls = rpn_cls.reshape((1, -1, 2, f_height, f_width))
                rpn_cls = mx.nd.transpose(rpn_cls, (0, 1, 3, 4, 2))
                rpn_reg = mx.nd.transpose(
                    rpn_reg.reshape((1, -1, 4, f_height, f_width)),
                    (0, 1, 3, 4, 2))
                mask = (rpn_cls_gt == 1).reshape(
                    (1, anchors_count, f_height, f_width, 1)).broadcast_to(
                        (1, anchors_count, f_height, f_width, 4))
                rpn_loss_reg = mx.nd.sum(
                    mx.nd.smooth_l1(
                        (rpn_reg - rpn_reg_gt) * mask, 3.0)) / mx.nd.sum(mask)
                rpn_loss_cls = softmax_celoss_with_ignore(
                    rpn_cls.reshape((-1, 2)), rpn_cls_gt.reshape((-1, )), -1)

                # RCNN part
                # add batch dimension
                rpn_bbox_sampled = mx.nd.concatenate([
                    mx.nd.zeros(
                        (rpn_bbox_sampled.shape[0], 1), ctx), rpn_bbox_sampled
                ],
                                                     axis=1)
                f = mx.nd.ROIPooling(f, rpn_bbox_sampled, (7, 7),
                                     1.0 / 16)  # VGG16 based spatial stride=16
                rcnn_cls, rcnn_reg = net.rcnn(f)
                mask = (rcnn_cls_target > 0).reshape(
                    (rcnn_cls_target.shape[0], 1)).broadcast_to(
                        (rcnn_cls_target.shape[0], 4 * cfg.num_classes))
                rcnn_loss_reg = mx.nd.sum(
                    mx.nd.smooth_l1((rcnn_reg - rcnn_reg_target) * mask,
                                    1.0)) / mx.nd.sum(mask)
                rcnn_loss_cls = mx.nd.softmax_cross_entropy(
                    rcnn_cls, rcnn_cls_target) / rcnn_cls.shape[0]

                loss = rpn_loss_cls + rpn_loss_reg + rcnn_loss_cls + rcnn_loss_reg

            loss.backward()
            trainer.step(data.shape[0])

            logger.info(
                "Epoch {} Iter {:>6d}: loss={:>6.5f}, rpn_loss_cls={:>6.5f}, rpn_loss_reg={:>6.5f}, rcnn_loss_cls={:>6.5f}, rcnn_loss_reg={:>6.5f}, lr={:>6.5f}"
                .format(epoch, it, loss.asscalar(), rpn_loss_cls.asscalar(),
                        rpn_loss_reg.asscalar(), rcnn_loss_cls.asscalar(),
                        rcnn_loss_reg.asscalar(), trainer.learning_rate))

        net.collect_params().save(
            os.path.join(args.save_path, "lastest.gluonmodel"))
        if epoch % args.save_interval == 0:
            save_schema = os.path.split(args.save_path)[1] + "-{}"
            net.collect_params().save(
                os.path.join(args.save_path,
                             save_schema.format(epoch) + ".gluonmodel"))
示例#29
0
# load config
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load data
imdb = VisualGenome(split=0, num_im=50)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)

# load net
net = FasterRCNN(classes=imdb.classes, debug=_DEBUG)
network.weights_normal_init(net, dev=0.01)
network.load_net(pretrained_model, net)
# network.load_pretrained_npy(net, 'checkpoints/VGG_imagenet.npy')
net.cuda()
net.train()

params = list(net.parameters())

print("Params are {}".format(
    '\n'.join(['{}: {}'.format(n, p.size()) for n,p in net.named_parameters()]))
)
# optimizer = torch.optim.Adam(params, lr=0.001, eps=1e-4, weight_decay=weight_decay)
optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay)

if not os.path.exists(output_dir):
示例#30
0
                    thickness=1)
    im_name = os.path.basename(image_file)
    print(os.path.join('demo/det_results', im_name))
    cv2.imwrite(os.path.join('demo/det_results', im_name), im2show)
    #cv2.imshow('demo', im2show)
    #cv2.waitKey(0)


def folder_test(net, folder):
    txt_file = folder + 'JPEGImages/file_name.txt'

    with open(txt_file) as f:
        for line in f:
            img_path = folder + 'JPEGImages/' + line.strip('\n') + '.JPG'
            anno_path = folder + 'Annotations/' + line.strip('\n') + '.xml'
            image_test(net, img_path, anno_path)


if __name__ == '__main__':
    model_file = 'models/saved_model3/faster_rcnn_100000.h5'
    detector = FasterRCNN()
    network.load_net(model_file, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')
    #image_file = 'demo/000001.JPG'
    #image_test(detector, image_file, None)

    folder = '/data/jmtian/PlateData/PVW_WRM_CUT/'
    folder_test(detector, folder)