def test_matmul_mat_with_two_matrices():
    mat1 = make_random_mat(20, 5)
    mat2 = make_random_mat(20, 5)
    vec = Variable(torch.randn(20, 7), requires_grad=True)

    mat1_copy = Variable(mat1.data, requires_grad=True)
    mat2_copy = Variable(mat2.data, requires_grad=True)
    vec_copy = Variable(vec.data, requires_grad=True)

    # Forward
    res = MulLazyVariable(RootLazyVariable(mat1),
                          RootLazyVariable(mat2)).matmul(vec)
    actual = prod([
        mat1_copy.matmul(mat1_copy.transpose(-1, -2)),
        mat2_copy.matmul(mat2_copy.transpose(-1, -2)),
    ]).matmul(vec_copy)
    assert torch.max(((res.data - actual.data) / actual.data).abs()) < 0.01

    # Backward
    res.sum().backward()
    actual.sum().backward()
    assert torch.max(((mat1.grad.data - mat1_copy.grad.data) /
                      mat1_copy.grad.data).abs()) < 0.01
    assert torch.max(((mat2.grad.data - mat2_copy.grad.data) /
                      mat2_copy.grad.data).abs()) < 0.01
    assert torch.max(((vec.grad.data - vec_copy.grad.data) /
                      vec_copy.grad.data).abs()) < 0.01
示例#2
0
    def test_batch_matmul_mat_with_two_matrices(self):
        mat1 = make_random_mat(20, rank=4, batch_size=5)
        mat2 = make_random_mat(20, rank=4, batch_size=5)
        vec = Variable(torch.randn(5, 20, 7), requires_grad=True)

        mat1_copy = Variable(mat1.data, requires_grad=True)
        mat2_copy = Variable(mat2.data, requires_grad=True)
        vec_copy = Variable(vec.data, requires_grad=True)

        # Forward
        res = MulLazyVariable(RootLazyVariable(mat1),
                              RootLazyVariable(mat2)).matmul(vec)
        actual = prod([
            mat1_copy.matmul(mat1_copy.transpose(-1, -2)),
            mat2_copy.matmul(mat2_copy.transpose(-1, -2))
        ]).matmul(vec_copy)
        self.assertLess(
            torch.max(((res.data - actual.data) / actual.data).abs()), 0.01)

        # Backward
        res.sum().backward()
        actual.sum().backward()
        self.assertLess(
            torch.max(((mat1.grad.data - mat1_copy.grad.data) /
                       mat1_copy.grad.data).abs()), 0.01)
        self.assertLess(
            torch.max(((mat2.grad.data - mat2_copy.grad.data) /
                       mat2_copy.grad.data).abs()), 0.01)
        self.assertLess(
            torch.max(((vec.grad.data - vec_copy.grad.data) /
                       vec_copy.grad.data).abs()), 0.01)
示例#3
0
    def test_matmul_vec_with_five_matrices(self):
        mat1 = make_random_mat(20, 5)
        mat2 = make_random_mat(20, 5)
        mat3 = make_random_mat(20, 5)
        mat4 = make_random_mat(20, 5)
        mat5 = make_random_mat(20, 5)
        vec = Variable(torch.randn(20), requires_grad=True)

        mat1_copy = Variable(mat1.data, requires_grad=True)
        mat2_copy = Variable(mat2.data, requires_grad=True)
        mat3_copy = Variable(mat3.data, requires_grad=True)
        mat4_copy = Variable(mat4.data, requires_grad=True)
        mat5_copy = Variable(mat5.data, requires_grad=True)
        vec_copy = Variable(vec.data, requires_grad=True)

        # Forward
        res = MulLazyVariable(
            RootLazyVariable(mat1),
            RootLazyVariable(mat2),
            RootLazyVariable(mat3),
            RootLazyVariable(mat4),
            RootLazyVariable(mat5),
        ).matmul(vec)
        actual = prod([
            mat1_copy.matmul(mat1_copy.transpose(-1, -2)),
            mat2_copy.matmul(mat2_copy.transpose(-1, -2)),
            mat3_copy.matmul(mat3_copy.transpose(-1, -2)),
            mat4_copy.matmul(mat4_copy.transpose(-1, -2)),
            mat5_copy.matmul(mat5_copy.transpose(-1, -2)),
        ]).matmul(vec_copy)
        self.assertLess(
            torch.max(((res.data - actual.data) / actual.data).abs()), 0.01)

        # Backward
        res.sum().backward()
        actual.sum().backward()
        self.assertLess(
            torch.max(((mat1.grad.data - mat1_copy.grad.data) /
                       mat1_copy.grad.data).abs()), 0.01)
        self.assertLess(
            torch.max(((mat2.grad.data - mat2_copy.grad.data) /
                       mat2_copy.grad.data).abs()), 0.01)
        self.assertLess(
            torch.max(((mat3.grad.data - mat3_copy.grad.data) /
                       mat3_copy.grad.data).abs()), 0.01)
        self.assertLess(
            torch.max(((mat4.grad.data - mat4_copy.grad.data) /
                       mat4_copy.grad.data).abs()), 0.01)
        self.assertLess(
            torch.max(((mat5.grad.data - mat5_copy.grad.data) /
                       mat5_copy.grad.data).abs()), 0.01)
        self.assertLess(
            torch.max(((vec.grad.data - vec_copy.grad.data) /
                       vec_copy.grad.data).abs()), 0.01)
def test_batch_matmul_mat_with_five_matrices():
    mat1 = make_random_mat(20, rank=4, batch_size=5)
    mat2 = make_random_mat(20, rank=4, batch_size=5)
    mat3 = make_random_mat(20, rank=4, batch_size=5)
    mat4 = make_random_mat(20, rank=4, batch_size=5)
    mat5 = make_random_mat(20, rank=4, batch_size=5)
    vec = Variable(torch.randn(5, 20, 7), requires_grad=True)

    mat1_copy = Variable(mat1.data, requires_grad=True)
    mat2_copy = Variable(mat2.data, requires_grad=True)
    mat3_copy = Variable(mat3.data, requires_grad=True)
    mat4_copy = Variable(mat4.data, requires_grad=True)
    mat5_copy = Variable(mat5.data, requires_grad=True)
    vec_copy = Variable(vec.data, requires_grad=True)

    # Forward
    res = MulLazyVariable(RootLazyVariable(mat1), RootLazyVariable(mat2),
                          RootLazyVariable(mat3), RootLazyVariable(mat4),
                          RootLazyVariable(mat5)).matmul(vec)
    actual = prod([
        mat1_copy.matmul(mat1_copy.transpose(-1, -2)),
        mat2_copy.matmul(mat2_copy.transpose(-1, -2)),
        mat3_copy.matmul(mat3_copy.transpose(-1, -2)),
        mat4_copy.matmul(mat4_copy.transpose(-1, -2)),
        mat5_copy.matmul(mat5_copy.transpose(-1, -2)),
    ]).matmul(vec_copy)
    assert torch.max(((res.data - actual.data) / actual.data).abs()) < 0.01

    # Backward
    res.sum().backward()
    actual.sum().backward()
    assert torch.max(((mat1.grad.data - mat1_copy.grad.data) /
                      mat1_copy.grad.data).abs()) < 0.01
    assert torch.max(((mat2.grad.data - mat2_copy.grad.data) /
                      mat2_copy.grad.data).abs()) < 0.01
    assert torch.max(((mat3.grad.data - mat3_copy.grad.data) /
                      mat3_copy.grad.data).abs()) < 0.01
    assert torch.max(((mat4.grad.data - mat4_copy.grad.data) /
                      mat4_copy.grad.data).abs()) < 0.01
    assert torch.max(((mat5.grad.data - mat5_copy.grad.data) /
                      mat5_copy.grad.data).abs()) < 0.01
    assert torch.max(((vec.grad.data - vec_copy.grad.data) /
                      vec_copy.grad.data).abs()) < 0.01