示例#1
0
def calOpt(height=240,
           width=320,
           maxdisp=256,
           fac=1,
           modelpath='finetune_67999.tar'):
    # Calculate model hyperparameters
    # Resize to 64X
    maxh = height
    maxw = width
    max_h = int(
        maxh // 64 * 64
    )  # Basically this is performing an integer division and modulo operation
    max_w = int(maxw // 64 * 64)  # if modulo is not zero, then round it up
    if max_h < maxh:  # The rounded-up integer is multiplied by 64x
        max_h += 64
    if max_w < maxw:
        max_w += 64

    # load model
    if (MODEL_OPTION == 'base'):
        model = VCN([1, max_w, max_h],
                    md=[int(4 * (maxdisp / 256)), 4, 4, 4, 4],
                    fac=fac)
    else:
        model = VCN_small([1, max_w, max_h],
                          md=[int(4 * (maxdisp / 256)), 4, 4, 4, 4],
                          fac=fac)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()

    # load weights
    pretrained_dict = torch.load(modelpath)
    mean_L = pretrained_dict['mean_L']
    mean_R = pretrained_dict['mean_R']
    model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    model.eval()
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    cap = cv2.VideoCapture('video.mp4')
    ret, old_frame = cap.read()

    while (1):
        ret, frame = cap.read()

        input_size = old_frame.shape
        imgL = cv2.resize(old_frame, (max_w, max_h))
        imgR = cv2.resize(frame, (max_w, max_h))

        # For gray input images
        # The model expects RGB images, in other words, 3 channels
        # This repeats H*W spatial values over the channel layer [H,W,1] -> [H,W,3]
        if len(old_frame.shape) == 2:
            old_frame = np.tile(old_frame[:, :, np.newaxis], (1, 1, 3))
            frame = np.tile(frame[:, :, np.newaxis], (1, 1, 3))

        # Flip channel, subtract mean
        # The model expects inputs of format [C,H,W] instead of [H,W,C]
        imgL = imgL[:, :, ::-1].copy() / 255. - np.asarray(mean_L).mean(0)[
            np.newaxis, np.newaxis, :]
        imgR = imgR[:, :, ::-1].copy() / 255. - np.asarray(mean_R).mean(0)[
            np.newaxis, np.newaxis, :]
        imgL = np.transpose(imgL, [2, 0, 1])[np.newaxis]
        imgR = np.transpose(imgR, [2, 0, 1])[np.newaxis]

        # Image to Torch tensor
        imgL = torch.FloatTensor(imgL).cuda()
        imgR = torch.FloatTensor(imgR).cuda()

        # Forward
        with torch.no_grad():
            imgLR = torch.cat([imgL, imgR], 0)
            time1 = time.time()
            rts = model(imgLR)
            pred_disp, entropy = rts
            print(time.time() - time1)

        k = cv2.waitKey(25)
        if k == 27:
            break

        old_frame = frame.copy()

        # Upsampling
        pred_disp = torch.squeeze(pred_disp).data.cpu().numpy(
        )  # Remove batch dimension, torch tensor to numpy ndarray
        pred_disp = cv2.resize(
            np.transpose(pred_disp, (1, 2, 0)), (input_size[1], input_size[0])
        )  # Resize to the original size, and transpose from [C,H,W] -> [H,W,C]
        pred_disp[:, :, 0] *= input_size[1] / max_w
        pred_disp[:, :, 1] *= input_size[0] / max_h
        flow = np.ones([pred_disp.shape[0], pred_disp.shape[1], 3])
        flow[:, :, :2] = pred_disp
        # entropy = torch.squeeze(entropy).data.cpu().numpy()
        # entropy = cv2.resize(entropy, (input_size[1], input_size[0]))

        cv2.imshow('frame', flow_to_image(flow))
示例#2
0
文件: main.py 项目: ne3x7/VCN
##iml0, iml1, flowl0, _, _, _,_,_ = lsfs.dataloader('%s/sceneflow/'%args.database)
##loader_stereo_sf = dr.myImageFloder(iml0,iml1,flowl0,shape = datashape,scale=1, order=1, dploader=disparity_loader_sf)

# data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*75+[loader_stereo_12]*75+[loader_stereo_mb]*600+[loader_stereo_sf])
# data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*50+[loader_stereo_12]*50+[loader_stereo_mb]*600+[loader_chairs])
# data_inuse = torch.utils.data.ConcatDataset([loader_chairs]*2 + [loader_things] +[loader_stereo_15]*300+[loader_stereo_12]*300) # stereo transfer
# data_inuse = torch.utils.data.ConcatDataset([loader_stereo_15]*20+[loader_stereo_12]*20) # stereo transfer
# data_inuse = torch.utils.data.ConcatDataset([loader_kitti15]*20+[loader_kitti12]*20+[loader_stereo_15]*20+[loader_stereo_12]*20)
print('%d batches per epoch' % (len(data_inuse) // batch_size))

# TODO
model = VCN([batch_size // ngpus] + data_inuse.datasets[0].shape[::-1],
            md=[int(4 * (args.maxdisp / 256)), 4, 4, 4, 4],
            fac=args.fac)
model = nn.DataParallel(model)
model.cuda()

total_iters = 0
mean_L = [[1.]]
mean_R = [[1.]]
if args.loadmodel is not None:
    pretrained_dict = torch.load(args.loadmodel)
    pretrained_dict['state_dict'] = {k: v for k, v in pretrained_dict['state_dict'].items()}

    model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    if args.retrain == 'true':
        print('re-training')
        with open('./iter_counts-%d.txt' % int(args.logname.split('-')[-1]), 'r') as f:
            total_iters = int(f.readline())
    else:
        with open('./iter_counts-%d.txt' % int(args.logname.split('-')[-1]), 'r') as f:
示例#3
0
def calOpt(height=240, width=320, maxdisp=256, fac=1, modelpath='finetune_67999.tar'):
    # Calculate model hyperparameters
    # Resize to 64X
    maxh = height
    maxw = width
    max_h = int(maxh // 64 * 64)            # Basically this is performing an integer division and modulo operation
    max_w = int(maxw // 64 * 64)            # if modulo is not zero, then round it up
    if max_h < maxh:                        # The rounded-up integer is multiplied by 64x
        max_h += 64
    if max_w < maxw: 
        max_w += 64

    # load model
    model = VCN([1, max_w, max_h], md=[int(4*(maxdisp/256)),4,4,4,4], fac=fac)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()

    # load weights
    pretrained_dict = torch.load(modelpath)
    mean_L=pretrained_dict['mean_L']
    mean_R=pretrained_dict['mean_R']
    model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    model.eval()
    print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()])))
    
    start_time = time.time()

    # Load image and Resize
    # Note that the images are loaded as [H,W,C] i.e. [H,W,3]
    imgL_o = imageio.imread('image1.png')[:,:,:3]        # In some cases, image files include alpha channel (the 4th channel)
    imgR_o = imageio.imread('image2.png')[:,:,:3]        # Only get the RGB channels (1st 3 channels)
    input_size = imgL_o.shape
    imgL = cv2.resize(imgL_o,(max_w, max_h))
    imgR = cv2.resize(imgR_o,(max_w, max_h))

    read_time = time.time()

    # For gray input images
    # The model expects RGB images, in other words, 3 channels
    # This repeats H*W spatial values over the channel layer [H,W,1] -> [H,W,3]
    if len(imgL_o.shape) == 2:
        imgL_o = np.tile(imgL_o[:,:,np.newaxis],(1,1,3))
        imgR_o = np.tile(imgR_o[:,:,np.newaxis],(1,1,3))

    # Flip channel, subtract mean
    # The model expects inputs of format [C,H,W] instead of [H,W,C]
    imgL = imgL[:,:,::-1].copy() / 255. - np.asarray(mean_L).mean(0)[np.newaxis,np.newaxis,:]
    imgR = imgR[:,:,::-1].copy() / 255. - np.asarray(mean_R).mean(0)[np.newaxis,np.newaxis,:]
    imgL = np.transpose(imgL, [2,0,1])[np.newaxis]
    imgR = np.transpose(imgR, [2,0,1])[np.newaxis]

    # Image to Torch tensor
    imgL = torch.FloatTensor(imgL).cuda()       
    imgR = torch.FloatTensor(imgR).cuda()       

    # Forward
    with torch.no_grad():
        imgLR = torch.cat([imgL,imgR],0)
        time1 = time.time()
        rts = model(imgLR)
        pred_disp, entropy = rts
        time2 = time.time()

    print(time2 - time1)

    forward_time = time.time()

    # Upsampling
    pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()                                 # Remove batch dimension, torch tensor to numpy ndarray
    pred_disp = cv2.resize(np.transpose(pred_disp,(1,2,0)), (input_size[1], input_size[0])) # Resize to the original size, and transpose from [C,H,W] -> [H,W,C]
    pred_disp[:,:,0] *= input_size[1] / max_w
    pred_disp[:,:,1] *= input_size[0] / max_h
    flow = np.ones([pred_disp.shape[0],pred_disp.shape[1],3])
    flow[:,:,:2] = pred_disp
    entropy = torch.squeeze(entropy).data.cpu().numpy()
    entropy = cv2.resize(entropy, (input_size[1], input_size[0]))

    upsample_time = time.time()

    print("Read: {}s".format(read_time - start_time))
    print("Forward: {}s".format(forward_time - start_time))
    print("Upsample: {}s".format(upsample_time - start_time))

    # Show results
    showImage( flow_to_image(flow), "flow_to_image.png")
    showImage( point_vec(imgL_o,flow)[:,:,::-1], "vector_on_image.png")
    showImage( entropy, "entropy.png")