示例#1
0
def eval_idct2d(x, expk0, expk1, expkM, expkN, runs):
    y_N = discrete_spectral_transform.idct2_N(x, expk0=expk0, expk1=expk1)
    torch.cuda.synchronize()
    tt = time.time()
    for i in range(runs):
        y_N = discrete_spectral_transform.idct2_N(x, expk0=expk0, expk1=expk1)
    torch.cuda.synchronize()
    print("PyTorch idct2_N takes %.7f ms" % ((time.time()-tt)/runs*1000))

    idct2func = dct.IDCT2(expk0, expk1, algorithm='2N')
    y_N = idct2func.forward(x)
    torch.cuda.synchronize()
    tt = time.time()
    for i in range(runs):
        y_N = idct2func.forward(x)
    torch.cuda.synchronize()
    print("IDCT2_2N Function takes %.7f ms" % ((time.time()-tt)/runs*1000))

    idct2func = dct.IDCT2(expk0, expk1, algorithm='N')
    y_N = idct2func.forward(x)
    torch.cuda.synchronize()
    tt = time.time()
    # with torch.autograd.profiler.profile(use_cuda=True) as prof:
    for i in range(runs):
        y_N = idct2func.forward(x)/x.size(0)/x.size(1)/4
    torch.cuda.synchronize()
    # print(prof)
    print("IDCT2_N Function takes %.7f ms" % ((time.time()-tt)/runs*1000))

    dct2func = dct2_fft2.IDCT2(expkM, expkN)
    y = dct2func.forward(x)
    torch.cuda.synchronize()
    tt = time.time()
    for i in range(runs):
        y_test = dct2func.forward(x)
    torch.cuda.synchronize()
    print("IDCT2_FFT2 Function takes %.7f ms" % ((time.time()-tt)/runs*1000))

    print("")
示例#2
0
def eval_runtime():
    #x = torch.tensor([1, 2, 7, 9, 20, 31], dtype=torch.float64)
    #print(dct_N(x))

    N = 512
    runs = 10
    x = torch.empty(10, N, N, dtype=torch.float64).uniform_(0, 10.0).cuda()
    perm = discrete_spectral_transform.get_perm(N, dtype=torch.int64, device=x.device)
    expk = discrete_spectral_transform.get_expk(N, dtype=x.dtype, device=x.device)

    #x_numpy = x.data.cpu().numpy()
    #tt = time.time()
    #for i in range(runs): 
    #    y = fftpack.dct(fftpack.dct(x_numpy[i%10].T, norm=None).T, norm=None)
    #print("scipy takes %.3f sec" % (time.time()-tt))

    ## 9s for 200 iterations 1024x1024 on GTX 1080
    #torch.cuda.synchronize()
    #tt = time.time()
    ##with torch.autograd.profiler.profile(use_cuda=True) as prof:
    #for i in range(runs): 
    #    y_2N = dct2_2N(x[0], expk0=expk, expk1=expk)
    #torch.cuda.synchronize()
    ##print(prof)
    #print("dct_2N takes %.3f ms" % ((time.time()-tt)/runs*1000))

    ## 11s for 200 iterations 1024x1024 on GTX 1080
    #torch.cuda.synchronize()
    #tt = time.time()
    ##with torch.autograd.profiler.profile(use_cuda=True) as prof:
    #for i in range(runs): 
    #    y_N = discrete_spectral_transform.dct2_N(x[i%10], perm0=perm, expk0=expk, perm1=perm, expk1=expk)
    #torch.cuda.synchronize()
    ##print(prof)
    #print("dct_N takes %.3f ms" % ((time.time()-tt)/runs*1000))

    #dct2func = dct.DCT2(expk, expk, algorithm='2N')
    #torch.cuda.synchronize()
    #tt = time.time()
    ##with torch.autograd.profiler.profile(use_cuda=True) as prof:
    #for i in range(runs): 
    #    y_N = dct2func.forward(x[0])
    #torch.cuda.synchronize()
    ##print(prof)
    #print("DCT2Function 2N takes %.3f ms" % ((time.time()-tt)/runs*1000))

    #dct2func = dct.DCT2(expk, expk, algorithm='N')
    #torch.cuda.synchronize()
    #tt = time.time()
    ##with torch.autograd.profiler.profile(use_cuda=True) as prof:
    #for i in range(runs): 
    #    y_N = dct2func.forward(x[0])
    #torch.cuda.synchronize()
    ##print(prof)
    #print("DCT2Function takes %.3f ms" % ((time.time()-tt)/runs*1000))

    #dct2func = dct_lee.DCT2(expk, expk)
    #torch.cuda.synchronize()
    #tt = time.time()
    ##with torch.autograd.profiler.profile(use_cuda=True) as prof:
    #for i in range(runs): 
    #    y_N = dct2func.forward(x[i%10])
    #torch.cuda.synchronize()
    ##print(prof)
    #print("DCT2Function lee takes %.3f ms" % ((time.time()-tt)/runs*1000))

    #torch.cuda.synchronize()
    #tt = time.time()
    ##with torch.autograd.profiler.profile(use_cuda=True) as prof:
    #for i in range(runs): 
    #    y_N = discrete_spectral_transform.idct2_2N(x[i%10], expk0=expk, expk1=expk)
    #torch.cuda.synchronize()
    ##print(prof)
    #print("idct2_2N takes %.3f ms" % ((time.time()-tt)/runs*1000))

    idct2func = dct.IDCT2(expk, expk, algorithm='2N')
    torch.cuda.synchronize()
    tt = time.time()
    #with torch.autograd.profiler.profile(use_cuda=True) as prof:
    for i in range(runs): 
        y_N = idct2func.forward(x[i%10])
    torch.cuda.synchronize()
    #print(prof)
    print("IDCT2Function 2N takes %.3f ms" % ((time.time()-tt)/runs*1000))

    idct2func = dct.IDCT2(expk, expk, algorithm='N')
    torch.cuda.synchronize()
    tt = time.time()
    #with torch.autograd.profiler.profile(use_cuda=True) as prof:
    for i in range(runs): 
        y_N = idct2func.forward(x[i%10])
    torch.cuda.synchronize()
    #print(prof)
    print("IDCT2Function takes %.3f ms" % ((time.time()-tt)/runs*1000))
    exit()

    #torch.cuda.synchronize()
    #tt = time.time()
    ##with torch.autograd.profiler.profile(use_cuda=True) as prof:
    #for i in range(runs): 
    #    y_N = discrete_spectral_transform.idxt(x[i%10], 0, expk=expk)
    #torch.cuda.synchronize()
    ##print(prof)
    #print("idxt takes %.3f ms" % ((time.time()-tt)/runs*1000))

    #idxct_func = dct.IDXCT(expk)
    #torch.cuda.synchronize()
    #tt = time.time()
    ##with torch.autograd.profiler.profile(use_cuda=True) as prof:
    #for i in range(runs): 
    #    y_N = idxct_func.forward(x[i%10])
    #torch.cuda.synchronize()
    ##print(prof)
    #print("IDXCTFunction takes %.3f ms" % ((time.time()-tt)/runs*1000))

    torch.cuda.synchronize()
    tt = time.time()
    #with torch.autograd.profiler.profile(use_cuda=True) as prof:
    for i in range(runs): 
        y_N = torch.rfft(x[i%10].view([1, N, N]), signal_ndim=2, onesided=False)
    torch.cuda.synchronize()
    #print(prof)
    print("fft2 takes %.3f ms" % ((time.time()-tt)/runs*1000))

    torch.cuda.synchronize()
    tt = time.time()
    #with torch.autograd.profiler.profile(use_cuda=True) as prof:
    for i in range(runs): 
        y_N = discrete_spectral_transform.idcct2(x[i%10], expk_0=expk, expk_1=expk)
    torch.cuda.synchronize()
    #print(prof)
    print("idcct2 takes %.3f ms" % ((time.time()-tt)/runs*1000))

    func = dct.IDCCT2(expk, expk)
    torch.cuda.synchronize()
    tt = time.time()
    #with torch.autograd.profiler.profile(use_cuda=True) as prof:
    for i in range(runs): 
        y_N = func.forward(x[i%10])
    torch.cuda.synchronize()
    #print(prof)
    print("IDCCT2Function takes %.3f ms" % ((time.time()-tt)/runs*1000))

    torch.cuda.synchronize()
    tt = time.time()
    #with torch.autograd.profiler.profile(use_cuda=True) as prof:
    for i in range(runs): 
        y_N = discrete_spectral_transform.idcst2(x[i%10], expk_0=expk, expk_1=expk)
    torch.cuda.synchronize()
    #print(prof)
    print("idcst2 takes %.3f ms" % ((time.time()-tt)/runs*1000))

    func = dct.IDCST2(expk, expk)
    torch.cuda.synchronize()
    tt = time.time()
    #with torch.autograd.profiler.profile(use_cuda=True) as prof:
    for i in range(runs): 
        y_N = func.forward(x[i%10])
    torch.cuda.synchronize()
    #print(prof)
    print("IDCST2Function takes %.3f ms" % ((time.time()-tt)/runs*1000))

    torch.cuda.synchronize()
    tt = time.time()
    #with torch.autograd.profiler.profile(use_cuda=True) as prof:
    for i in range(runs): 
        y_N = discrete_spectral_transform.idsct2(x[i%10], expk_0=expk, expk_1=expk)
    torch.cuda.synchronize()
    #print(prof)
    print("idsct2 takes %.3f ms" % ((time.time()-tt)/runs*1000))

    func = dct.IDSCT2(expk, expk)
    torch.cuda.synchronize()
    tt = time.time()
    #with torch.autograd.profiler.profile(use_cuda=True) as prof:
    for i in range(runs): 
        y_N = func.forward(x[i%10])
    torch.cuda.synchronize()
    #print(prof)
    print("IDSCT2Function takes %.3f ms" % ((time.time()-tt)/runs*1000))
示例#3
0
    def test_idct2Random(self):
        torch.manual_seed(10)
        M = 4
        N = 8
        x = torch.tensor(torch.empty(M, N, dtype=torch.int32).random_(0, 10), dtype=dtype)
        print("2D x")
        print(x)

        y = discrete_spectral_transform.dct2_2N(x)

        golden_value = discrete_spectral_transform.idct2_2N(y).data.numpy()
        print("2D golden_value")
        print(golden_value)

        # test cpu using N-FFT 
        #pdb.set_trace()
        custom = dct.IDCT2(algorithm='N')
        dct_value = custom.forward(y)
        print("2D dct_value")
        print(dct_value.data.numpy())

        np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

        # test cpu using 2N-FFT 
        #pdb.set_trace()
        custom = dct.IDCT2(algorithm='2N')
        dct_value = custom.forward(y)
        print("2D dct_value")
        print(dct_value.data.numpy())

        np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

        # test cpu using dct_lee
        #pdb.set_trace()
        custom = dct_lee.IDCT2()
        dct_value = custom.forward(y)
        print("2D dct_value")
        print(dct_value.data.numpy())

        np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

        # test gpu 
        custom = dct.IDCT2(algorithm='N')
        dct_value = custom.forward(y.cuda()).cpu()
        print("2D dct_value cuda")
        print(dct_value.data.numpy())

        np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

        # test gpu 
        custom = dct.IDCT2(algorithm='2N')
        dct_value = custom.forward(y.cuda()).cpu()
        print("2D dct_value cuda")
        print(dct_value.data.numpy())

        np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

        # test gpu 
        custom = dct_lee.IDCT2()
        dct_value = custom.forward(y.cuda()).cpu()
        print("2D dct_value cuda")
        print(dct_value.data.numpy())

        np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)
def compare_different_methods(cuda_flag, M=1024, N=1024, dtype=torch.float64):
    density_map = torch.empty(M, N, dtype=dtype).uniform_(0, 10.0)
    if cuda_flag:
        density_map = density_map.cuda()
    expkM = discrete_spectral_transform.get_expk(M, dtype, density_map.device)
    expkN = discrete_spectral_transform.get_expk(N, dtype, density_map.device)
    exact_expkM = discrete_spectral_transform.get_exact_expk(M, dtype, density_map.device)
    exact_expkN = discrete_spectral_transform.get_exact_expk(N, dtype, density_map.device)
    print("M = {}, N = {}".format(M, N))

    wu = torch.arange(M, dtype=density_map.dtype, device=density_map.device).mul(2 * np.pi / M).view([M, 1])
    wv = torch.arange(N, dtype=density_map.dtype, device=density_map.device).mul(2 * np.pi / N).view([1, N])
    wu2_plus_wv2 = wu.pow(2) + wv.pow(2)
    wu2_plus_wv2[0, 0] = 1.0  # avoid zero-division, it will be zeroed out

    inv_wu2_plus_wv2_2X = 2.0 / wu2_plus_wv2
    inv_wu2_plus_wv2_2X[0, 0] = 0.0
    wu_by_wu2_plus_wv2_2X = wu.mul(inv_wu2_plus_wv2_2X)
    wv_by_wu2_plus_wv2_2X = wv.mul(inv_wu2_plus_wv2_2X)

    # the first approach is used as the ground truth
    auv_golden = dct.dct2(density_map, expk0=expkM, expk1=expkN)
    auv = auv_golden.clone()
    auv[0, :].mul_(0.5)
    auv[:, 0].mul_(0.5)
    auv_by_wu2_plus_wv2_wu = auv.mul(wu_by_wu2_plus_wv2_2X)
    auv_by_wu2_plus_wv2_wv = auv.mul(wv_by_wu2_plus_wv2_2X)
    field_map_x_golden = dct.idsct2(auv_by_wu2_plus_wv2_wu, expkM, expkN)
    field_map_y_golden = dct.idcst2(auv_by_wu2_plus_wv2_wv, expkM, expkN)
    # compute potential phi
    # auv / (wu**2 + wv**2)
    auv_by_wu2_plus_wv2 = auv.mul(inv_wu2_plus_wv2_2X).mul_(2)
    #potential_map = discrete_spectral_transform.idcct2(auv_by_wu2_plus_wv2, expkM, expkN)
    potential_map_golden = dct.idcct2(auv_by_wu2_plus_wv2, expkM, expkN)
    # compute energy
    energy_golden = potential_map_golden.mul(density_map).sum()

    if density_map.is_cuda:
        torch.cuda.synchronize()

    # the second approach uses the idxst_idct and idct_idxst
    dct2 = dct2_fft2.DCT2(exact_expkM, exact_expkN)
    idct2 = dct2_fft2.IDCT2(exact_expkM, exact_expkN)
    idct_idxst = dct2_fft2.IDCT_IDXST(exact_expkM, exact_expkN)
    idxst_idct = dct2_fft2.IDXST_IDCT(exact_expkM, exact_expkN)

    inv_wu2_plus_wv2 = 1.0 / wu2_plus_wv2
    inv_wu2_plus_wv2[0, 0] = 0.0
    wu_by_wu2_plus_wv2_half = wu.mul(inv_wu2_plus_wv2).mul_(0.5)
    wv_by_wu2_plus_wv2_half = wv.mul(inv_wu2_plus_wv2).mul_(0.5)

    buv = dct2.forward(density_map)

    buv_by_wu2_plus_wv2_wu = buv.mul(wu_by_wu2_plus_wv2_half)
    buv_by_wu2_plus_wv2_wv = buv.mul(wv_by_wu2_plus_wv2_half)
    field_map_x = idxst_idct.forward(buv_by_wu2_plus_wv2_wu)
    field_map_y = idct_idxst.forward(buv_by_wu2_plus_wv2_wv)
    buv_by_wu2_plus_wv2 = buv.mul(inv_wu2_plus_wv2)
    potential_map = idct2.forward(buv_by_wu2_plus_wv2)
    energy = potential_map.mul(density_map).sum()

    if density_map.is_cuda:
        torch.cuda.synchronize()

    # compare results
    np.testing.assert_allclose(buv.data.cpu().numpy(), auv_golden.data.cpu().numpy(), rtol=1e-6, atol=1e-5)
    np.testing.assert_allclose(field_map_x.data.cpu().numpy(), field_map_x_golden.data.cpu().numpy(), rtol=1e-6, atol=1e-5)
    np.testing.assert_allclose(field_map_y.data.cpu().numpy(), field_map_y_golden.data.cpu().numpy(), rtol=1e-6, atol=1e-5)
    np.testing.assert_allclose(potential_map.data.cpu().numpy(), potential_map_golden.data.cpu().numpy(), rtol=1e-6, atol=1e-5)
    np.testing.assert_allclose(energy.data.cpu().numpy(), energy_golden.data.cpu().numpy(), rtol=1e-6, atol=1e-5)

    # the third approach uses the dct.idxst_idct and dct.idxst_idct
    dct2 = dct.DCT2(expkM, expkN)
    idct2 = dct.IDCT2(expkM, expkN)
    idct_idxst = dct.IDCT_IDXST(expkM, expkN)
    idxst_idct = dct.IDXST_IDCT(expkM, expkN)

    cuv = dct2.forward(density_map)

    cuv_by_wu2_plus_wv2_wu = cuv.mul(wu_by_wu2_plus_wv2_half)
    cuv_by_wu2_plus_wv2_wv = cuv.mul(wv_by_wu2_plus_wv2_half)
    field_map_x = idxst_idct.forward(cuv_by_wu2_plus_wv2_wu)
    field_map_y = idct_idxst.forward(cuv_by_wu2_plus_wv2_wv)
    cuv_by_wu2_plus_wv2 = cuv.mul(inv_wu2_plus_wv2)
    potential_map = idct2.forward(cuv_by_wu2_plus_wv2)
    energy = potential_map.mul(density_map).sum()

    if density_map.is_cuda:
        torch.cuda.synchronize()

    # compare results
    np.testing.assert_allclose(cuv.data.cpu().numpy(), auv_golden.data.cpu().numpy(), rtol=1e-6, atol=1e-5)
    np.testing.assert_allclose(field_map_x.data.cpu().numpy(), field_map_x_golden.data.cpu().numpy(), rtol=1e-6, atol=1e-5)
    np.testing.assert_allclose(field_map_y.data.cpu().numpy(), field_map_y_golden.data.cpu().numpy(), rtol=1e-6, atol=1e-5)
    np.testing.assert_allclose(potential_map.data.cpu().numpy(), potential_map_golden.data.cpu().numpy(), rtol=1e-6, atol=1e-5)
    np.testing.assert_allclose(energy.data.cpu().numpy(), energy_golden.data.cpu().numpy(), rtol=1e-6, atol=1e-5)
示例#5
0
    def test_idct2Random(self):
        torch.manual_seed(10)
        M = 4
        N = 8
        x = torch.empty(M, N, dtype=torch.int32).random_(0, 10).double()
        print("2D x")
        print(x)

        expkM = discrete_spectral_transform.get_exact_expk(M, dtype=x.dtype, device=x.device)
        expkN = discrete_spectral_transform.get_exact_expk(N, dtype=x.dtype, device=x.device)

        y = discrete_spectral_transform.dct2_2N(x)

        golden_value = discrete_spectral_transform.idct2_2N(y).data.numpy()
        print("2D idct golden_value")
        print(golden_value)

        # test cpu using N-FFT
        # pdb.set_trace()
        custom = dct.IDCT2(algorithm='N')
        dct_value = custom.forward(y)
        print("2D idct_value")
        print(dct_value.data.numpy())

        np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

        # test cpu using 2N-FFT
        # pdb.set_trace()
        custom = dct.IDCT2(algorithm='2N')
        dct_value = custom.forward(y)
        print("2D idct_value")
        print(dct_value.data.numpy())

        np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

        # test cpu using dct_lee
        # pdb.set_trace()
        custom = dct_lee.IDCT2()
        dct_value = custom.forward(y)
        print("2D idct_value")
        print(dct_value.data.numpy())

        np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

        # test cpu using fft2
        custom = dct2_fft2.IDCT2(expkM, expkN)
        dct_value = custom.forward(y)
        print("2D idct_value cuda")
        print(dct_value.data.numpy())

        np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

        if torch.cuda.device_count():
            # test gpu
            custom = dct.IDCT2(algorithm='N')
            dct_value = custom.forward(y.cuda()).cpu()
            print("2D idct_value cuda")
            print(dct_value.data.numpy())

            np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

            # test gpu
            custom = dct.IDCT2(algorithm='2N')
            dct_value = custom.forward(y.cuda()).cpu()
            print("2D idct_value cuda")
            print(dct_value.data.numpy())

            np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

            # test gpu
            custom = dct_lee.IDCT2()
            dct_value = custom.forward(y.cuda()).cpu()
            print("2D idct_value cuda")
            print(dct_value.data.numpy())

            np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)

            # test gpu using ifft2
            custom = dct2_fft2.IDCT2(expkM.cuda(), expkN.cuda())
            dct_value = custom.forward(y.cuda()).cpu()
            print("2D idct_value cuda")
            print(dct_value.data.numpy())

            np.testing.assert_allclose(dct_value.data.numpy(), golden_value, rtol=1e-6, atol=1e-5)