示例#1
0
文件: main.py 项目: sljlp/VCN
##loader_stereo_12 = dr.myImageFloder(iml0,iml1,flowl0,shape = datashape,scale=1, order=0, prob=0.5,dploader=disparity_loader)
##iml0, iml1, flowl0, _, _, _ = lmbs.dataloader('%s/mb-ex-training/'%args.database, res='F')
##loader_stereo_mb = dr.myImageFloder(iml0,iml1,flowl0,shape = datashape,scale=0.5, order=1, prob=0.5,dploader=disparity_loader_mb)
##iml0, iml1, flowl0, _, _, _,_,_ = lsfs.dataloader('%s/sceneflow/'%args.database)
##loader_stereo_sf = dr.myImageFloder(iml0,iml1,flowl0,shape = datashape,scale=1, order=1, dploader=disparity_loader_sf)

#data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*75+[loader_stereo_12]*75+[loader_stereo_mb]*600+[loader_stereo_sf])
#data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*50+[loader_stereo_12]*50+[loader_stereo_mb]*600+[loader_chairs])
#data_inuse = torch.utils.data.ConcatDataset([loader_chairs]*2 + [loader_things] +[loader_stereo_15]*300+[loader_stereo_12]*300) # stereo transfer
#data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*20+[loader_stereo_12]*20) # stereo transfer
#data_inuse = torch.utils.data.ConcatDataset([loader_kitti15]*20+[loader_kitti12]*20+[loader_stereo_15]*20+[loader_stereo_12]*20)
print('%d batches per epoch' % (len(data_inuse) // batch_size))

#TODO
model = VCN([batch_size // ngpus] + data_inuse.datasets[0].shape[::-1],
            md=[int(4 * (args.maxdisp / 256)), 4, 4, 4, 4],
            fac=args.fac)
model = nn.DataParallel(model)
model.cuda()

total_iters = 0
mean_L = [[0.33, 0.33, 0.33]]
mean_R = [[0.33, 0.33, 0.33]]
if args.loadmodel is not None:
    pretrained_dict = torch.load(args.loadmodel)
    pretrained_dict['state_dict'] = {
        k: v
        for k, v in pretrained_dict['state_dict'].items()
    }

    model.load_state_dict(pretrained_dict['state_dict'], strict=False)
示例#2
0
文件: main.py 项目: ne3x7/VCN
##loader_stereo_12 = dr.myImageFloder(iml0,iml1,flowl0,shape = datashape,scale=1, order=0, prob=0.5,dploader=disparity_loader)
##iml0, iml1, flowl0, _, _, _ = lmbs.dataloader('%s/mb-ex-training/'%args.database, res='F')
##loader_stereo_mb = dr.myImageFloder(iml0,iml1,flowl0,shape = datashape,scale=0.5, order=1, prob=0.5,dploader=disparity_loader_mb)
##iml0, iml1, flowl0, _, _, _,_,_ = lsfs.dataloader('%s/sceneflow/'%args.database)
##loader_stereo_sf = dr.myImageFloder(iml0,iml1,flowl0,shape = datashape,scale=1, order=1, dploader=disparity_loader_sf)

# data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*75+[loader_stereo_12]*75+[loader_stereo_mb]*600+[loader_stereo_sf])
# data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*50+[loader_stereo_12]*50+[loader_stereo_mb]*600+[loader_chairs])
# data_inuse = torch.utils.data.ConcatDataset([loader_chairs]*2 + [loader_things] +[loader_stereo_15]*300+[loader_stereo_12]*300) # stereo transfer
# data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*20+[loader_stereo_12]*20) # stereo transfer
# data_inuse = torch.utils.data.ConcatDataset([loader_kitti15]*20+[loader_kitti12]*20+[loader_stereo_15]*20+[loader_stereo_12]*20)
print('%d batches per epoch' % (len(data_inuse) // batch_size))

# TODO
model = VCN([batch_size // ngpus] + data_inuse.datasets[0].shape[::-1],
            md=[int(4 * (args.maxdisp / 256)), 4, 4, 4, 4],
            fac=args.fac)
model = nn.DataParallel(model)
model.cuda()

total_iters = 0
mean_L = [[1.]]
mean_R = [[1.]]
if args.loadmodel is not None:
    pretrained_dict = torch.load(args.loadmodel)
    pretrained_dict['state_dict'] = {k: v for k, v in pretrained_dict['state_dict'].items()}

    model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    if args.retrain == 'true':
        print('re-training')
        with open('./iter_counts-%d.txt' % int(args.logname.split('-')[-1]), 'r') as f:
示例#3
0
    with open('FlyingChairs_train_val.txt', 'r') as f:
        split = [int(i) for i in f.readlines()]
    test_left_img = [test_left_img[i] for i,flag in enumerate(split)     if flag==2]
    test_right_img = [test_right_img[i] for i,flag in enumerate(split)     if flag==2]

if args.model == 'VCN':
    from models.VCN import VCN
elif args.model == 'VCN_small':
    from models.VCN_small import VCN
#if '2015' in args.dataset:
#    model = VCN([1, maxw, maxh], md=[8,4,4,4,4], fac=2)
#elif 'sintel' in args.dataset:
#    model = VCN([1, maxw, maxh], md=[7,4,4,4,4], fac=1.4)
#else:
#    model = VCN([1, maxw, maxh], md=[4,4,4,4,4], fac=1)
model = VCN([1, maxw, maxh], md=[int(4*(args.maxdisp/256)),4,4,4,4], fac=args.fac)
    
model = nn.DataParallel(model, device_ids=[0])
model.cuda()
if args.loadmodel is not None:

    pretrained_dict = torch.load(args.loadmodel)
    mean_L=pretrained_dict['mean_L']
    mean_R=pretrained_dict['mean_R']
    pretrained_dict['state_dict'] =  {k:v for k,v in pretrained_dict['state_dict'].items() if 'grid' not in k and (('flow_reg' not in k) or ('conv1' in k))}

    model.load_state_dict(pretrained_dict['state_dict'],strict=False)
else:
    mean_L = [[1.]]
    mean_R = [[1.]]
    print('dry run')
示例#4
0
文件: evaluation.py 项目: ne3x7/VCN
parser.add_argument(
    '--fac',
    type=float,
    default=1,
    help=
    'controls the shape of search grid. Only affect the coarse cost volume size'
)
args = parser.parse_args()

if args.model == 'VCN':
    from models.VCN import VCN
elif args.model == 'VCN_small':
    from models.VCN_small import VCN

model = VCN([1, 256, 256],
            md=[int(4 * (args.maxdisp / 256)), 4, 4, 4, 4],
            fac=args.fac)

model = nn.DataParallel(model, device_ids=[0])
model.cuda()
if args.loadmodel is not None:

    pretrained_dict = torch.load(args.loadmodel)
    mean_L = pretrained_dict['mean_L']
    mean_R = pretrained_dict['mean_R']
    pretrained_dict['state_dict'] = {
        k: v
        for k, v in pretrained_dict['state_dict'].items()
        if 'grid' not in k and (('flow_reg' not in k) or ('conv1' in k))
    }
示例#5
0
    type=float,
    default=1,
    help=
    'controls the shape of search grid. Only affect the coarse cost volume size'
)
args = parser.parse_args()

if args.model == 'VCN':
    from models.VCN import VCN
elif args.model == 'VCN_small':
    from models.VCN_small import VCN

maxw, maxh = [256 * args.testres, 256 * args.testres]

model = VCN([1, maxw, maxh],
            md=[int(4 * (args.maxdisp / 256)), 4, 4, 4, 4],
            fac=args.fac)

model = nn.DataParallel(model, device_ids=[0])
model.cuda()
if args.loadmodel is not None:

    pretrained_dict = torch.load(args.loadmodel)
    mean_L = pretrained_dict['mean_L']
    mean_R = pretrained_dict['mean_R']
    pretrained_dict['state_dict'] = {
        k: v
        for k, v in pretrained_dict['state_dict'].items()
        if 'grid' not in k and (('flow_reg' not in k) or ('conv1' in k))
    }
示例#6
0
def calOpt(height=240,
           width=320,
           maxdisp=256,
           fac=1,
           modelpath='finetune_67999.tar'):
    # Calculate model hyperparameters
    # Resize to 64X
    maxh = height
    maxw = width
    max_h = int(
        maxh // 64 * 64
    )  # Basically this is performing an integer division and modulo operation
    max_w = int(maxw // 64 * 64)  # if modulo is not zero, then round it up
    if max_h < maxh:  # The rounded-up integer is multiplied by 64x
        max_h += 64
    if max_w < maxw:
        max_w += 64

    # load model
    if (MODEL_OPTION == 'base'):
        model = VCN([1, max_w, max_h],
                    md=[int(4 * (maxdisp / 256)), 4, 4, 4, 4],
                    fac=fac)
    else:
        model = VCN_small([1, max_w, max_h],
                          md=[int(4 * (maxdisp / 256)), 4, 4, 4, 4],
                          fac=fac)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()

    # load weights
    pretrained_dict = torch.load(modelpath)
    mean_L = pretrained_dict['mean_L']
    mean_R = pretrained_dict['mean_R']
    model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    model.eval()
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    cap = cv2.VideoCapture('video.mp4')
    ret, old_frame = cap.read()

    while (1):
        ret, frame = cap.read()

        input_size = old_frame.shape
        imgL = cv2.resize(old_frame, (max_w, max_h))
        imgR = cv2.resize(frame, (max_w, max_h))

        # For gray input images
        # The model expects RGB images, in other words, 3 channels
        # This repeats H*W spatial values over the channel layer [H,W,1] -> [H,W,3]
        if len(old_frame.shape) == 2:
            old_frame = np.tile(old_frame[:, :, np.newaxis], (1, 1, 3))
            frame = np.tile(frame[:, :, np.newaxis], (1, 1, 3))

        # Flip channel, subtract mean
        # The model expects inputs of format [C,H,W] instead of [H,W,C]
        imgL = imgL[:, :, ::-1].copy() / 255. - np.asarray(mean_L).mean(0)[
            np.newaxis, np.newaxis, :]
        imgR = imgR[:, :, ::-1].copy() / 255. - np.asarray(mean_R).mean(0)[
            np.newaxis, np.newaxis, :]
        imgL = np.transpose(imgL, [2, 0, 1])[np.newaxis]
        imgR = np.transpose(imgR, [2, 0, 1])[np.newaxis]

        # Image to Torch tensor
        imgL = torch.FloatTensor(imgL).cuda()
        imgR = torch.FloatTensor(imgR).cuda()

        # Forward
        with torch.no_grad():
            imgLR = torch.cat([imgL, imgR], 0)
            time1 = time.time()
            rts = model(imgLR)
            pred_disp, entropy = rts
            print(time.time() - time1)

        k = cv2.waitKey(25)
        if k == 27:
            break

        old_frame = frame.copy()

        # Upsampling
        pred_disp = torch.squeeze(pred_disp).data.cpu().numpy(
        )  # Remove batch dimension, torch tensor to numpy ndarray
        pred_disp = cv2.resize(
            np.transpose(pred_disp, (1, 2, 0)), (input_size[1], input_size[0])
        )  # Resize to the original size, and transpose from [C,H,W] -> [H,W,C]
        pred_disp[:, :, 0] *= input_size[1] / max_w
        pred_disp[:, :, 1] *= input_size[0] / max_h
        flow = np.ones([pred_disp.shape[0], pred_disp.shape[1], 3])
        flow[:, :, :2] = pred_disp
        # entropy = torch.squeeze(entropy).data.cpu().numpy()
        # entropy = cv2.resize(entropy, (input_size[1], input_size[0]))

        cv2.imshow('frame', flow_to_image(flow))
示例#7
0
def calOpt(height=240, width=320, maxdisp=256, fac=1, modelpath='finetune_67999.tar'):
    # Calculate model hyperparameters
    # Resize to 64X
    maxh = height
    maxw = width
    max_h = int(maxh // 64 * 64)            # Basically this is performing an integer division and modulo operation
    max_w = int(maxw // 64 * 64)            # if modulo is not zero, then round it up
    if max_h < maxh:                        # The rounded-up integer is multiplied by 64x
        max_h += 64
    if max_w < maxw: 
        max_w += 64

    # load model
    model = VCN([1, max_w, max_h], md=[int(4*(maxdisp/256)),4,4,4,4], fac=fac)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()

    # load weights
    pretrained_dict = torch.load(modelpath)
    mean_L=pretrained_dict['mean_L']
    mean_R=pretrained_dict['mean_R']
    model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    model.eval()
    print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()])))
    
    start_time = time.time()

    # Load image and Resize
    # Note that the images are loaded as [H,W,C] i.e. [H,W,3]
    imgL_o = imageio.imread('image1.png')[:,:,:3]        # In some cases, image files include alpha channel (the 4th channel)
    imgR_o = imageio.imread('image2.png')[:,:,:3]        # Only get the RGB channels (1st 3 channels)
    input_size = imgL_o.shape
    imgL = cv2.resize(imgL_o,(max_w, max_h))
    imgR = cv2.resize(imgR_o,(max_w, max_h))

    read_time = time.time()

    # For gray input images
    # The model expects RGB images, in other words, 3 channels
    # This repeats H*W spatial values over the channel layer [H,W,1] -> [H,W,3]
    if len(imgL_o.shape) == 2:
        imgL_o = np.tile(imgL_o[:,:,np.newaxis],(1,1,3))
        imgR_o = np.tile(imgR_o[:,:,np.newaxis],(1,1,3))

    # Flip channel, subtract mean
    # The model expects inputs of format [C,H,W] instead of [H,W,C]
    imgL = imgL[:,:,::-1].copy() / 255. - np.asarray(mean_L).mean(0)[np.newaxis,np.newaxis,:]
    imgR = imgR[:,:,::-1].copy() / 255. - np.asarray(mean_R).mean(0)[np.newaxis,np.newaxis,:]
    imgL = np.transpose(imgL, [2,0,1])[np.newaxis]
    imgR = np.transpose(imgR, [2,0,1])[np.newaxis]

    # Image to Torch tensor
    imgL = torch.FloatTensor(imgL).cuda()       
    imgR = torch.FloatTensor(imgR).cuda()       

    # Forward
    with torch.no_grad():
        imgLR = torch.cat([imgL,imgR],0)
        time1 = time.time()
        rts = model(imgLR)
        pred_disp, entropy = rts
        time2 = time.time()

    print(time2 - time1)

    forward_time = time.time()

    # Upsampling
    pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()                                 # Remove batch dimension, torch tensor to numpy ndarray
    pred_disp = cv2.resize(np.transpose(pred_disp,(1,2,0)), (input_size[1], input_size[0])) # Resize to the original size, and transpose from [C,H,W] -> [H,W,C]
    pred_disp[:,:,0] *= input_size[1] / max_w
    pred_disp[:,:,1] *= input_size[0] / max_h
    flow = np.ones([pred_disp.shape[0],pred_disp.shape[1],3])
    flow[:,:,:2] = pred_disp
    entropy = torch.squeeze(entropy).data.cpu().numpy()
    entropy = cv2.resize(entropy, (input_size[1], input_size[0]))

    upsample_time = time.time()

    print("Read: {}s".format(read_time - start_time))
    print("Forward: {}s".format(forward_time - start_time))
    print("Upsample: {}s".format(upsample_time - start_time))

    # Show results
    showImage( flow_to_image(flow), "flow_to_image.png")
    showImage( point_vec(imgL_o,flow)[:,:,::-1], "vector_on_image.png")
    showImage( entropy, "entropy.png")