示例#1
0
def main():
    # Check if arguments are okay
    error = check()
    if error:
        print(error)
        exit(1)

    # Create extraction folder and extract frames
    IS_WINDOWS = 'Windows' == platform.system()
    extractionDir = "tmpSuperSloMo"
    if not IS_WINDOWS:
        # Assuming UNIX-like system where "." indicates hidden directories
        extractionDir = "." + extractionDir
    if os.path.isdir(extractionDir):
        rmtree(extractionDir)
    os.mkdir(extractionDir)
    if IS_WINDOWS:
        FILE_ATTRIBUTE_HIDDEN = 0x02
        # ctypes.windll only exists on Windows
        ctypes.windll.kernel32.SetFileAttributesW(extractionDir,
                                                  FILE_ATTRIBUTE_HIDDEN)

    extractionPath = os.path.join(extractionDir, "input")
    outputPath = os.path.join(extractionDir, "output")
    os.mkdir(extractionPath)
    os.mkdir(outputPath)
    error = extract_frames(args.video, extractionPath)
    if error:
        print(error)
        exit(1)

    # Initialize transforms
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    #  import ipdb; ipdb.set_trace()

    mean = [0.429, 0.431, 0.397]
    std = [1, 1, 1]
    normalize = transforms.Normalize(mean=mean, std=std)

    negmean = [x * -1 for x in mean]
    revNormalize = transforms.Normalize(mean=negmean, std=std)

    # Temporary fix for issue #7 https://github.com/avinashpaliwal/Super-SloMo/issues/7 -
    # - Removed per channel mean subtraction for CPU.
    if (device == "cpu"):
        transform = transforms.Compose([transforms.ToTensor()])
        TP = transforms.Compose([transforms.ToPILImage()])
    else:
        transform = transforms.Compose([transforms.ToTensor(), normalize])
        TP = transforms.Compose([revNormalize, transforms.ToPILImage()])

    # Load data
    videoFrames = dataloader.Video(root=extractionPath, transform=transform)
    videoFramesloader = torch.utils.data.DataLoader(videoFrames,
                                                    batch_size=args.batch_size,
                                                    shuffle=False)

    # Initialize model
    flowComp = model.UNet(6, 4)
    flowComp.to(device)
    for param in flowComp.parameters():
        param.requires_grad = False
    ArbTimeFlowIntrp = model.UNet(20, 5)
    ArbTimeFlowIntrp.to(device)
    for param in ArbTimeFlowIntrp.parameters():
        param.requires_grad = False

    flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1],
                                  device)
    flowBackWarp = flowBackWarp.to(device)

    dict1 = torch.load(args.checkpoint, map_location='cpu')
    ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT'])
    flowComp.load_state_dict(dict1['state_dictFC'])

    # Interpolate frames
    frameCounter = 1

    with torch.no_grad():
        for _, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0):

            I0 = frame0.to(device)
            I1 = frame1.to(device)

            flowOut = flowComp(torch.cat((I0, I1), dim=1))
            F_0_1 = flowOut[:, :2, :, :]
            F_1_0 = flowOut[:, 2:, :, :]

            # Save reference frames in output folder
            for batchIndex in range(args.batch_size):
                (TP(frame0[batchIndex].detach())).resize(
                    videoFrames.origDim, Image.BILINEAR).save(
                        os.path.join(
                            outputPath,
                            str(frameCounter + args.sf * batchIndex) + ".jpg"))
            frameCounter += 1

            # Generate intermediate frames
            for intermediateIndex in range(1, args.sf):
                t = intermediateIndex / args.sf
                temp = -t * (1 - t)
                fCoeff = [temp, t * t, (1 - t) * (1 - t), temp]

                F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0
                F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0

                g_I0_F_t_0 = flowBackWarp(I0, F_t_0)
                g_I1_F_t_1 = flowBackWarp(I1, F_t_1)

                intrpOut = ArbTimeFlowIntrp(
                    torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1,
                               g_I0_F_t_0),
                              dim=1))

                F_t_0_f = intrpOut[:, :2, :, :] + F_t_0
                F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1
                V_t_0 = F.sigmoid(intrpOut[:, 4:5, :, :])
                V_t_1 = 1 - V_t_0

                g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f)
                g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f)

                wCoeff = [1 - t, t]

                Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 *
                        g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1)

                # Save intermediate frame
                for batchIndex in range(args.batch_size):
                    (TP(Ft_p[batchIndex].cpu().detach())).resize(
                        videoFrames.origDim, Image.BILINEAR).save(
                            os.path.join(
                                outputPath,
                                str(frameCounter + args.sf * batchIndex) +
                                ".jpg"))
                frameCounter += 1

            # Set counter accounting for batching of frames
            frameCounter += args.sf * (args.batch_size - 1)

    # Generate video from interpolated frames
    create_video(outputPath)

    # Remove temporary files
    rmtree(extractionDir)

    exit(0)
示例#2
0
def main():
    extractPath = "./video_interpolation"
    prepare_folders(extractPath)

    video_to_images(args.video, os.path.join(extractPath, "input"))

    # Initialize transforms
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    normalize = torchvision.transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[1, 1, 1])
    revNormalize = torchvision.transforms.Normalize(mean=[-0.5, -0.5, -0.5], std=[1, 1, 1])

    if (device == "cpu"):
        transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
        TP = torchvision.transforms.Compose([torchvision.transforms.ToPILImage()])
    else:
        transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor(), normalize])
        TP = torchvision.transforms.Compose([revNormalize, torchvision.transforms.ToPILImage()])

    # Load data
    videoFrames = dataloader.Video(root=os.path.join(extractPath, "input"), transform=transform)
    videoFramesloader = torch.utils.data.DataLoader(videoFrames, batch_size=2, shuffle=False)

    # Initialize model
    flowComp = model.UNet(6, 4)
    flowComp.to(device)
    for param in flowComp.parameters():
        param.requires_grad = False
    ArbTimeFlowIntrp = model.UNet(20, 5)
    ArbTimeFlowIntrp.to(device)
    for param in ArbTimeFlowIntrp.parameters():
        param.requires_grad = False

    flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1], device)
    flowBackWarp = flowBackWarp.to(device)

    dict1 = torch.load("./checkpoints/Interpolation0.ckpt", map_location=device)
    ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT'])
    flowComp.load_state_dict(dict1['state_dictFC'])

    # Interpolate frames
    frameCounter = 0

    # batch_size = 2
    with torch.no_grad():
        for frameIndex, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0):

            I0 = frame0.to(device)
            I1 = frame1.to(device)

            flowOut = flowComp(torch.cat((I0, I1), dim=1))
            F_0_1 = flowOut[:,:2,:,:]
            F_1_0 = flowOut[:,2:,:,:]

            # Save reference frames in output folder
            for batchIndex in range(2):
                (TP(frame0[batchIndex].detach())).resize(videoFrames.origDim, Image.BILINEAR).save(os.path.join(os.path.join(extractPath, "output"), "frame{:05d}.png".format(frameCounter + 2 * batchIndex)))
            frameCounter += 1

            # Generate intermediate frame

            t = float(1) / 2
            temp = -t * (1 - t)
            fCoeff = [temp, t * t, (1 - t) * (1 - t), temp]

            F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0
            F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0

            g_I0_F_t_0 = flowBackWarp(I0, F_t_0)
            g_I1_F_t_1 = flowBackWarp(I1, F_t_1)

            intrpOut = ArbTimeFlowIntrp(torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1, g_I0_F_t_0), dim=1))

            F_t_0_f = intrpOut[:, :2, :, :] + F_t_0
            F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1
            V_t_0   = torch.sigmoid(intrpOut[:, 4:5, :, :])
            V_t_1   = 1 - V_t_0

            g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f)
            g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f)

            wCoeff = [1 - t, t]

            Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 * g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1)

            # Save intermediate frame
            for batchIndex in range(2):
                (TP(Ft_p[batchIndex].cpu().detach())).resize(videoFrames.origDim, Image.BILINEAR).save(os.path.join(os.path.join(extractPath, "output"), "frame{:05d}.png".format(frameCounter + 2 * batchIndex)))
            frameCounter += 1

            frameCounter += 2
    # Generate video from interpolated frames
    create_video(os.path.join(extractPath, "output"), os.path.join(extractPath, "output"))
示例#3
0
def main():
    # Check if arguments are okay
    error = check()
    if error:
        print(error)
        exit(1)

    # Create extraction folder and extract frames
    # Assuming UNIX-like system where "." indicates hidden directories
    extractionDir = ".tmpSuperSloMo"
    # if os.path.isdir(extractionDir):
    #     rmtree(extractionDir)
    # os.mkdir(extractionDir)

    extractionPath = os.path.join(extractionDir, "input")
    outputPath     = os.path.join(extractionDir, "output")
    # os.mkdir(extractionPath)
    # os.mkdir(outputPath)
    # error = extract_frames(args.video, extractionPath)
    # if error:
    #     print(error)
    #     exit(1)

    # Initialize transforms
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    mean = [0.429, 0.431, 0.397]
    std  = [1, 1, 1]
    normalize = transforms.Normalize(mean=mean,
                                     std=std)

    negmean = [x * -1 for x in mean]
    revNormalize = transforms.Normalize(mean=negmean, std=std)

    # Temporary fix for issue #7 https://github.com/avinashpaliwal/Super-SloMo/issues/7 -
    # - Removed per channel mean subtraction for CPU.
    if (device == "cpu"):
        transform = transforms.Compose([transforms.ToTensor()])
        TP = transforms.Compose([transforms.ToPILImage()])
    else:
        transform = transforms.Compose([transforms.ToTensor(), normalize])
        TP = transforms.Compose([revNormalize, transforms.ToPILImage()])

    # Load data
    videoFrames = dataloader.Video(root=extractionPath, transform=transform)
    # pdb.set_trace()
    # len(videoFrames[0]) ==> 2
    # (Pdb) videoFrames[0][0].size()
    # torch.Size([3, 512, 960])


    videoFramesloader = torch.utils.data.DataLoader(videoFrames, batch_size=1, shuffle=False)

    # Initialize model
    # UNet(inChannels, outChannels)
    # flow Computation !!!
    flowComp = model.UNet(6, 4)
    flowComp.to(device)
    for param in flowComp.parameters():
        param.requires_grad = False

    # arbitary-time
    ArbTimeFlowIntrp = model.UNet(20, 5)
    ArbTimeFlowIntrp.to(device)
    for param in ArbTimeFlowIntrp.parameters():
        param.requires_grad = False

    flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1], device)
    flowBackWarp = flowBackWarp.to(device)
    flowBackWarp = amp.initialize(flowBackWarp, opt_level= "O1")
    # pdb.set_trace()
    # (Pdb) videoFrames.dim[0], videoFrames.dim[1]
    # (960, 512)

    dict1 = torch.load(args.checkpoint, map_location='cpu')
    # dict_keys(['Detail', 'epoch', 'timestamp', 'trainBatchSz', 'validationBatchSz', 
    # 'learningRate', 'loss', 'valLoss', 'valPSNR', 'state_dictFC', 'state_dictAT'])

    ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT'])
    flowComp.load_state_dict(dict1['state_dictFC'])

    ArbTimeFlowIntrp = amp.initialize(ArbTimeFlowIntrp, opt_level= "O1")
    flowComp = amp.initialize(flowComp, opt_level= "O1")

    # Interpolate frames
    frameCounter = 1

    with torch.no_grad():
        for _, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0):

            I0 = frame0.to(device)
            I1 = frame1.to(device)
            # pdb.set_trace()
            # torch.Size([1, 3, 512, 960])

            flowOut = flowComp(torch.cat((I0, I1), dim=1))
            F_0_1 = flowOut[:,:2,:,:]
            F_1_0 = flowOut[:,2:,:,:]
            # (Pdb) pp flowOut.size()
            # torch.Size([1, 4, 512, 960])
            # (Pdb) pp F_0_1.size()
            # torch.Size([1, 2, 512, 960])
            # (Pdb) pp F_1_0.size()
            # torch.Size([1, 2, 512, 960])

            # pdb.set_trace()
            # (Pdb) pp I0.size(), I1.size()
            # (torch.Size([1, 3, 512, 960]), torch.Size([1, 3, 512, 960]))
            # (Pdb) pp torch.cat((I0, I1), dim=1).size()
            # torch.Size([1, 6, 512, 960])

            # Save reference frames in output folder
            (TP(frame0[0].detach())).resize(videoFrames.origDim, Image.BILINEAR).save(\
                os.path.join(outputPath, "{:06d}.png".format(frameCounter)))
            frameCounter += 1

            # Generate intermediate frames
            # (Pdb) for i in range(1, args.sf): print(i)
            # 1
            # 2
            # 3
            for intermediateIndex in range(1, args.sf):
                t = float(intermediateIndex) / args.sf
                temp = -t * (1 - t)
                fCoeff = [temp, t * t, (1 - t) * (1 - t), temp]

                pdb.set_trace()
                # (Pdb) pp temp
                # -0.1875
                # (Pdb) pp fCoeff
                # [-0.1875, 0.0625, 0.5625, -0.1875]

                F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0
                F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0

                g_I0_F_t_0 = flowBackWarp(I0, F_t_0)
                g_I1_F_t_1 = flowBackWarp(I1, F_t_1)

                intrpOut = ArbTimeFlowIntrp(\
                    torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, \
                        g_I1_F_t_1, g_I0_F_t_0), \
                    dim=1))

                F_t_0_f = intrpOut[:, :2, :, :] + F_t_0
                F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1
                pdb.set_trace()

                # pdb.set_trace()
                # (Pdb) intrpOut.size()
                # torch.Size([1, 5, 512, 960])

                V_t_0   = torch.sigmoid(intrpOut[:, 4:5, :, :])
                V_t_1   = 1 - V_t_0

                g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f)
                g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f)

                # pdb.set_trace()

                wCoeff = [1 - t, t]

                Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 * g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1)

                del g_I0_F_t_0_f, g_I1_F_t_1_f, F_t_0_f, F_t_1_f, F_t_0, F_t_1, intrpOut, V_t_0, V_t_1, wCoeff
                torch.cuda.empty_cache()

                pdb.set_trace()

                # Save intermediate frame
                (TP(Ft_p[0].cpu().detach())).resize(videoFrames.origDim, Image.BILINEAR).save(os.path.join(outputPath, "{:06d}.png".format(frameCounter)))
                del Ft_p
                torch.cuda.empty_cache()

                frameCounter += 1

            del F_0_1, F_1_0, flowOut, I0, I1, frame0, frame1
            torch.cuda.empty_cache()

    # Generate video from interpolated frames
    # create_video(outputPath)

    # Remove temporary files
    # rmtree(extractionDir)

    exit(0)
def evaluate_frame_dir(extractionPath):
    outputPath = os.path.join(extractionDir, "output")
    inputframe_dir = os.path.join(extractionDir, "inputframe")

    if op.exists(outputPath): rmtree(outputPath)
    if op.exists(inputframe_dir): rmtree(inputframe_dir)

    os.makedirs(outputPath, exist_ok=True)
    os.makedirs(inputframe_dir, exist_ok=True)

    frames_gt = os.listdir(extractionPath)
    frames_gt.sort()

    print(frames_gt)
    for ind, i in enumerate(frames_gt):
        if ind % (args.sf) == 0:
            shutil.copyfile(os.path.join(extractionPath, i),
                            os.path.join(inputframe_dir, i))

    video_time = time.time()
    # Initialize transforms
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    mean = [0.429, 0.431, 0.397]
    std = [1, 1, 1]
    normalize = transforms.Normalize(mean=mean, std=std)

    negmean = [x * -1 for x in mean]
    revNormalize = transforms.Normalize(mean=negmean, std=std)

    # Temporary fix for issue #7 https://github.com/avinashpaliwal/Super-SloMo/issues/7 -
    # - Removed per channel mean subtraction for CPU.
    if (device == "cpu"):
        transform = transforms.Compose([transforms.ToTensor()
                                        ])  #添加一个转化函数,后面用于对每个img做filter
        TP = transforms.Compose([transforms.ToPILImage()])
    else:
        transform = transforms.Compose([transforms.ToTensor(), normalize])
        TP = transforms.Compose([revNormalize, transforms.ToPILImage()])

    # Load data
    videoFrames = dataloader.Video(root=inputframe_dir, transform=transform)
    videoFramesloader = torch.utils.data.DataLoader(videoFrames,
                                                    batch_size=args.batch_size,
                                                    shuffle=False)

    # Initialize model
    #第一个unet,用于计算光流
    flowComp = model.UNet(6, 4)
    flowComp.to(device)

    #这里只需要inference,去掉训练bp
    for param in flowComp.parameters():
        param.requires_grad = False
    #第二个UNET,用于合成
    ArbTimeFlowIntrp = model.UNet(20, 5)
    ArbTimeFlowIntrp.to(device)
    for param in ArbTimeFlowIntrp.parameters():
        param.requires_grad = False

    flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1],
                                  device)
    flowBackWarp = flowBackWarp.to(device)
    #加载模型的checkpoint
    dict1 = torch.load(args.checkpoint, map_location='cpu')
    ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT'])
    flowComp.load_state_dict(dict1['state_dictFC'])

    # Interpolate frames
    frameCounter = 0
    '''
    Tqdm 是一个快速,可扩展的Python进度条,可以在 Python 长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator)。 
    '''
    with torch.no_grad():
        for _, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0):

            I0 = frame0.to(device)
            I1 = frame1.to(device)
            #print (I0.shape)

            #!!!!实现细节:在dim1连接起来!!!!
            flowOut = flowComp(torch.cat((I0, I1), dim=1))
            #flowout中应该是前0,1维度为0-》1的光流,2,3维度为1-》0光流
            F_0_1 = flowOut[:, :2, :, :]
            F_1_0 = flowOut[:, 2:, :, :]

            # Save reference frames in output folder
            #保存原始视频帧
            for batchIndex in range(args.batch_size):
                pass
                #(TP(frame0[batchIndex].detach())).resize(videoFrames.origDim, Image.BILINEAR).save(os.path.join(outputPath, str(frameCounter + args.sf * batchIndex) + ".jpg"))
            frameCounter += 1
            sttime = time.time()
            # Generate intermediate frames
            for intermediateIndex in range(1, args.sf):
                t = intermediateIndex / args.sf
                temp = -t * (1 - t)
                fCoeff = [temp, t * t, (1 - t) * (1 - t), temp]

                F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0
                F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0

                #第一个unet的初步结果,先看看这里的效果和我第一步骤对比
                g_I0_F_t_0 = flowBackWarp(I0, F_t_0)
                g_I1_F_t_1 = flowBackWarp(I1, F_t_1)

                #将上面一堆参数连接起来,送入下一个预测网络中
                intrpOut = ArbTimeFlowIntrp(
                    torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1,
                               g_I0_F_t_0),
                              dim=1))

                F_t_0_f = intrpOut[:, :2, :, :] + F_t_0
                F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1
                V_t_0 = F.sigmoid(intrpOut[:, 4:5, :, :])
                V_t_1 = 1 - V_t_0

                g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f)
                g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f)

                wCoeff = [1 - t, t]

                #注意这里将mask加入到两帧合成上的方式,还加入了时间序列
                Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 *
                        g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1)
                #print (Ft_p.shape)
                #这里看看他的中间结果怎么样?
                #Ft_p = (wCoeff[0] * g_I0_F_t_0 + wCoeff[1] * g_I1_F_t_1)
                #结果虽然时序上感觉有点抖动,但是清晰度上却是相当好,应该是loss函数的问题

                # Save intermediate frame
                #保存中间插入的帧
                for batchIndex in range(args.batch_size):
                    #ttp="%06d.jpg"%(frameCounter + args.sf * batchIndex)
                    ttp = frames_gt[frameCounter + args.sf * batchIndex]

                    ttp = os.path.join(outputPath, ttp)
                    #print (videoFrames.origDim) #(480, 270)
                    (TP(Ft_p[batchIndex].cpu().detach())).save(ttp)
                frameCounter += 1
            print("run %d iters, time:%f ,average:%f s/iter" %
                  (args.sf - 1, time.time() - sttime,
                   (time.time() - sttime) / (args.sf - 1)))
            # Set counter accounting for batching of frames
            frameCounter += args.sf * (args.batch_size - 1)

    ssim_kep = []
    psnr_kep = []
    for i in os.listdir(outputPath):
        gt_img = cv2.imread(os.path.join(extractionPath, i))
        genimg = cv2.imread(os.path.join(outputPath, i))

        #scale>0表示将标签图分辨率乘scale为目的分辨率,小于0表示使用生成图像分辨率
        scale = 1
        if scale > 0:
            target_shape = (int(gt_img.shape[1] * scale),
                            int(gt_img.shape[0] * scale))
        else:
            target_shape = (genimg.shape[1], genimg.shape[0])
        #print (genimg.shape)
        gt_img = cv2.resize(gt_img, target_shape)
        genimg = cv2.resize(genimg, target_shape)

        psnr = skimage.measure.compare_psnr(gt_img, genimg, 255)
        ssim = skimage.measure.compare_ssim(gt_img, genimg, multichannel=True)

        psnr_kep.append(psnr)
        ssim_kep.append(ssim)
    print("mean psnr:", np.mean(psnr_kep))
    print("mean ssim:", np.mean(ssim_kep))
    print("this video time used:", time.time() - video_time)
    # Generate video from interpolated frames
    #create_video(outputPath)

    # Remove temporary files
    rmtree(outputPath)
    rmtree(inputframe_dir)
def main():
    # Check if arguments are okay
    error = check()
    if error:
        print(error)
        exit(1)

    # Create extraction folder and extract frames
    IS_WINDOWS = 'Windows' == platform.system()
    extractionDir = "tmpSuperSloMo"

    #这里需要有个文件夹放截出来的帧,其实没必要费力去把这个文件夹搞成隐藏的
    if not IS_WINDOWS:
        # Assuming UNIX-like system where "." indicates hidden directories
        extractionDir = "." + extractionDir

    if os.path.isdir(extractionDir):
        rmtree(extractionDir)
    os.mkdir(extractionDir)
    if IS_WINDOWS:
        FILE_ATTRIBUTE_HIDDEN = 0x02
        # ctypes.windll only exists on Windows
        ctypes.windll.kernel32.SetFileAttributesW(extractionDir,
                                                  FILE_ATTRIBUTE_HIDDEN)

    extractionPath = os.path.join(extractionDir, "input")
    outputPath = os.path.join(extractionDir, "output")
    os.mkdir(extractionPath)
    os.mkdir(outputPath)
    error = extract_frames(args.video, extractionPath)
    if error:
        print(error)
        exit(1)

    # Initialize transforms
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    mean = [0.429, 0.431, 0.397]
    std = [1, 1, 1]
    normalize = transforms.Normalize(mean=mean, std=std)

    negmean = [x * -1 for x in mean]
    revNormalize = transforms.Normalize(mean=negmean, std=std)

    # Temporary fix for issue #7 https://github.com/avinashpaliwal/Super-SloMo/issues/7 -
    # - Removed per channel mean subtraction for CPU.
    if (device == "cpu"):
        transform = transforms.Compose([transforms.ToTensor()
                                        ])  #添加一个转化函数,后面用于对每个img做filter
        TP = transforms.Compose([transforms.ToPILImage()])
    else:
        transform = transforms.Compose([transforms.ToTensor(), normalize])
        TP = transforms.Compose([revNormalize, transforms.ToPILImage()])

    # Load data
    videoFrames = dataloader.Video(root=extractionPath, transform=transform)
    videoFramesloader = torch.utils.data.DataLoader(videoFrames,
                                                    batch_size=args.batch_size,
                                                    shuffle=False)

    # Initialize model
    #第一个unet,用于计算光流
    flowComp = model.UNet(6, 4)
    flowComp.to(device)

    #这里只需要inference,去掉训练bp
    for param in flowComp.parameters():
        param.requires_grad = False
    #第二个UNET,用于合成
    ArbTimeFlowIntrp = model.UNet(20, 5)
    ArbTimeFlowIntrp.to(device)
    for param in ArbTimeFlowIntrp.parameters():
        param.requires_grad = False

    flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1],
                                  device)
    flowBackWarp = flowBackWarp.to(device)
    #加载模型的checkpoint
    dict1 = torch.load(args.checkpoint, map_location='cpu')
    ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT'])
    flowComp.load_state_dict(dict1['state_dictFC'])

    # Interpolate frames
    frameCounter = 1
    '''
    Tqdm 是一个快速,可扩展的Python进度条,可以在 Python 长循环中添加一个进度提示信息,用户只需要封装任意的迭代器 tqdm(iterator)。 
    '''
    with torch.no_grad():
        for _, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0):

            I0 = frame0.to(device)
            I1 = frame1.to(device)

            #!!!!实现细节:在dim1连接起来!!!!
            flowOut = flowComp(torch.cat((I0, I1), dim=1))
            #flowout中应该是前0,1维度为0-》1的光流,2,3维度为1-》0光流
            F_0_1 = flowOut[:, :2, :, :]
            F_1_0 = flowOut[:, 2:, :, :]

            # Save reference frames in output folder
            #保存原始视频帧
            for batchIndex in range(args.batch_size):
                (TP(frame0[batchIndex].detach())).resize(
                    videoFrames.origDim, Image.BILINEAR).save(
                        os.path.join(
                            outputPath,
                            str(frameCounter + args.sf * batchIndex) + ".jpg"))
            frameCounter += 1

            # Generate intermediate frames
            for intermediateIndex in range(1, args.sf):
                t = intermediateIndex / args.sf
                temp = -t * (1 - t)
                fCoeff = [temp, t * t, (1 - t) * (1 - t), temp]

                F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0
                F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0

                #第一个unet的初步结果,先看看这里的效果和我第一步骤对比
                g_I0_F_t_0 = flowBackWarp(I0, F_t_0)
                g_I1_F_t_1 = flowBackWarp(I1, F_t_1)

                #将上面一堆参数连接起来,送入下一个预测网络中
                intrpOut = ArbTimeFlowIntrp(
                    torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1,
                               g_I0_F_t_0),
                              dim=1))

                F_t_0_f = intrpOut[:, :2, :, :] + F_t_0
                F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1
                V_t_0 = F.sigmoid(intrpOut[:, 4:5, :, :])
                V_t_1 = 1 - V_t_0

                g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f)
                g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f)

                wCoeff = [1 - t, t]

                #注意这里将mask加入到两帧合成上的方式,还加入了时间序列
                Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 *
                        g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1)

                #这里看看他的中间结果怎么样?
                #Ft_p = (wCoeff[0] * g_I0_F_t_0 + wCoeff[1] * g_I1_F_t_1)
                #结果虽然时序上感觉有点抖动,但是清晰度上却是相当好,应该是loss函数的问题

                # Save intermediate frame
                #保存中间插入的帧
                for batchIndex in range(args.batch_size):
                    (TP(Ft_p[batchIndex].cpu().detach())).resize(
                        videoFrames.origDim, Image.BILINEAR).save(
                            os.path.join(
                                outputPath,
                                str(frameCounter + args.sf * batchIndex) +
                                ".jpg"))
                frameCounter += 1

            # Set counter accounting for batching of frames
            frameCounter += args.sf * (args.batch_size - 1)

    # Generate video from interpolated frames
    create_video(outputPath)

    # Remove temporary files
    rmtree(extractionDir)

    exit(0)
示例#6
0
def main():
    os.makedirs(args.output, exist_ok=True)
    outputPath = args.output

    if args.sf < 2:
        print("Slowmo factor must be at least 2.")
        return

    # Initialize transforms
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    mean = [0.429, 0.431, 0.397]
    std = [1, 1, 1]
    normalize = transforms.Normalize(mean=mean, std=std)

    negmean = [x * -1 for x in mean]
    revNormalize = transforms.Normalize(mean=negmean, std=std)

    # Temporary fix for issue #7 https://github.com/avinashpaliwal/Super-SloMo/issues/7 -
    # - Removed per channel mean subtraction for CPU.
    if (device == "cpu"):
        transform = transforms.Compose([transforms.ToTensor()])
        TP = transforms.Compose([transforms.ToPILImage()])
    else:
        transform = transforms.Compose([transforms.ToTensor(), normalize])
        TP = transforms.Compose([revNormalize, transforms.ToPILImage()])

    # Load data
    videoFrames = dataloader.Video(root=extractionPath, transform=transform)
    videoFramesloader = torch.utils.data.DataLoader(videoFrames,
                                                    batch_size=args.batch_size,
                                                    shuffle=False)

    # Initialize model
    flowComp = model.UNet(6, 4)
    flowComp.to(device)
    for param in flowComp.parameters():
        param.requires_grad = False
    ArbTimeFlowIntrp = model.UNet(20, 5)
    ArbTimeFlowIntrp.to(device)
    for param in ArbTimeFlowIntrp.parameters():
        param.requires_grad = False

    flowBackWarp = model.backWarp(videoFrames.dim[0], videoFrames.dim[1],
                                  device)
    flowBackWarp = flowBackWarp.to(device)

    dict1 = torch.load(args.checkpoint, map_location='cpu')
    ArbTimeFlowIntrp.load_state_dict(dict1['state_dictAT'])
    flowComp.load_state_dict(dict1['state_dictFC'])

    # Interpolate frames
    frameCounter = 1

    with torch.no_grad():
        for _, (frame0, frame1) in enumerate(tqdm(videoFramesloader), 0):

            I0 = frame0.to(device)
            I1 = frame1.to(device)

            flowOut = flowComp(torch.cat((I0, I1), dim=1))
            F_0_1 = flowOut[:, :2, :, :]
            F_1_0 = flowOut[:, 2:, :, :]

            # Save reference frames in output folder
            for batchIndex in range(args.batch_size):
                (TP(frame0[batchIndex].detach())).resize(
                    videoFrames.origDim, Image.BILINEAR).save(
                        os.path.join(
                            outputPath,
                            str(frameCounter + args.sf * batchIndex).zfill(8) +
                            ".png"))
            frameCounter += 1

            # Generate intermediate frames
            for intermediateIndex in range(1, args.sf):
                t = float(intermediateIndex) / args.sf
                temp = -t * (1 - t)
                fCoeff = [temp, t * t, (1 - t) * (1 - t), temp]

                F_t_0 = fCoeff[0] * F_0_1 + fCoeff[1] * F_1_0
                F_t_1 = fCoeff[2] * F_0_1 + fCoeff[3] * F_1_0

                g_I0_F_t_0 = flowBackWarp(I0, F_t_0)
                g_I1_F_t_1 = flowBackWarp(I1, F_t_1)

                intrpOut = ArbTimeFlowIntrp(
                    torch.cat((I0, I1, F_0_1, F_1_0, F_t_1, F_t_0, g_I1_F_t_1,
                               g_I0_F_t_0),
                              dim=1))

                F_t_0_f = intrpOut[:, :2, :, :] + F_t_0
                F_t_1_f = intrpOut[:, 2:4, :, :] + F_t_1
                V_t_0 = torch.sigmoid(intrpOut[:, 4:5, :, :])
                V_t_1 = 1 - V_t_0

                g_I0_F_t_0_f = flowBackWarp(I0, F_t_0_f)
                g_I1_F_t_1_f = flowBackWarp(I1, F_t_1_f)

                wCoeff = [1 - t, t]

                Ft_p = (wCoeff[0] * V_t_0 * g_I0_F_t_0_f + wCoeff[1] * V_t_1 *
                        g_I1_F_t_1_f) / (wCoeff[0] * V_t_0 + wCoeff[1] * V_t_1)

                # Save intermediate frame
                for batchIndex in range(args.batch_size):
                    (TP(Ft_p[batchIndex].cpu().detach())).resize(
                        videoFrames.origDim, Image.BILINEAR).save(
                            os.path.join(
                                outputPath,
                                str(frameCounter +
                                    args.sf * batchIndex).zfill(8) + ".png"))
                frameCounter += 1

            # Set counter accounting for batching of frames
            frameCounter += args.sf * (args.batch_size - 1)

    exit(0)