示例#1
0
        self.set_module_name("add")

    def forward(self):
        return torch.add(self.input_one, self.input_two)


# The generated test names based on add_short_configs will be in the following pattern:
# add_M8_N16_K32_devicecpu
# add_M8_N16_K32_devicecpu_bwdall
# add_M8_N16_K32_devicecpu_bwd1
# add_M8_N16_K32_devicecpu_bwd2
# ...
# Those names can be used to filter tests.

op_bench.generate_pt_test(add_long_configs + add_short_configs, AddBenchmark)
op_bench.generate_pt_gradient_test(add_long_configs + add_short_configs,
                                   AddBenchmark)
"""Mircobenchmark for addmm operator."""


class AddmmBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, device):
        self.input_one = torch.rand(M,
                                    K,
                                    device=device,
                                    requires_grad=self.auto_set())
        self.mat1 = torch.rand(M,
                               N,
                               device=device,
                               requires_grad=self.auto_set())
        self.mat2 = torch.rand(N,
                               K,
示例#2
0
        else:
            # Replace tensors with float and long types for original per tensor
            # fake quantize kernel.
            self.args[1], self.args[2] = 1., 0
            self.op = torch.fake_quantize_per_tensor_affine

    def forward(self):
        return self.op(*self.args)

op_bench.generate_pt_test(
    fake_quantize_operator_configs_short + fake_quantize_operator_configs_long,
    FakeQuantizePerTensorOpBenchmark
)

op_bench.generate_pt_gradient_test(
    fake_quantize_operator_configs_short + fake_quantize_operator_configs_long,
    FakeQuantizePerTensorOpBenchmark
)

class FakeQuantizePerChannelOpBenchmark(op_bench.TorchBenchmarkBase):
    r"""Benchmarks 3 different fake quantize per channel operators."""
    def init(self, N, C, H, W, nbits, device, op_type):
        self.quant_min = 0
        self.quant_max = 2 ** nbits - 1
        self.quant_range = 2 ** nbits
        # Axis is chosen with respect to the number of channels: C.
        self.axis = 1
        self.input = torch.rand(N, C, H, W, dtype=torch.float, device=device)
        self.scale = torch.tensor([1.] * C).to(device)
        self.zero_point = torch.tensor([0.] * C).to(device)
        self.input.requires_grad_()
        self.args = [
import operator_benchmark as op_bench
import torch


add_configs = op_bench.cross_product_configs(
    M=[8],
    N=[8],
    K=[8],
    device=["cuda", "cpu"],
    tags=["short"]
)


class AddBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, device): 
        self.input_one = torch.rand(M, N, K, device=device, requires_grad=True)
        self.input_two = torch.rand(M, N, K, device=device, requires_grad=True)
        self.set_module_name("add")

    def forward(self):
        return torch.add(self.input_one, self.input_two)


op_bench.generate_pt_test(add_configs, AddBenchmark)
op_bench.generate_pt_gradient_test(add_configs, AddBenchmark)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
示例#4
0
    input_size=[8, 16, 64],
    offset=[0],
    sparse=[True],
    tags=['short']
)


class EmbeddingBagBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, embeddingbags, dim, mode, input_size, offset, sparse):
        self.embegging = torch.nn.EmbeddingBag(
            num_embeddings=embeddingbags,
            embedding_dim=dim,
            mode=mode,
            sparse=sparse)
        numpy.random.seed((1 << 32) - 1)
        self.input = torch.tensor(numpy.random.randint(0, embeddingbags, input_size)).long()
        self.offset = torch.LongTensor([offset])

        self.set_module_name('embeddingbag')

    def forward(self):
        return self.embegging(self.input, self.offset)


op_bench.generate_pt_test(embeddingbag_short_configs, EmbeddingBagBenchmark)
op_bench.generate_pt_gradient_test(embeddingbag_short_configs, EmbeddingBagBenchmark)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
示例#5
0
)

batchnorm_configs_long = op_bench.cross_product_configs(
    M=[1, 128],
    N=[8192, 2048],
    K=[1],
    device=['cpu', 'cuda'],
    tags=["long"]
)


class BatchNormBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, device):
        self.input_one = torch.rand(M, N, K, device=device, requires_grad=self.auto_set())
        self.mean = torch.rand(N, device=device)
        self.var = torch.rand(N, device=device)
        self.weight = torch.rand(N, device=device)
        self.bias = torch.rand(N, device=device)
        self.set_module_name("batchnorm")

    def forward(self):
        return F.batch_norm(self.input_one, self.mean, self.var, self.weight, self.bias)


op_bench.generate_pt_test(batchnorm_configs_short + batchnorm_configs_long, BatchNormBenchmark)
op_bench.generate_pt_gradient_test(batchnorm_configs_short + batchnorm_configs_long, BatchNormBenchmark)


if __name__ == "__main__":
    op_bench.benchmark_runner.main()
        input = torch.tensor(numpy.random.randint(0, embeddingbags, input_size), device=device).long()
        self.inputs = {
            "input": input,
            "offset": torch.cat((offsets, torch.tensor([input.size(0)], dtype=torch.long)), 0)
        }
        self.set_module_name('qatEmbeddingBag')

    def forward(self, input, offset):
        return self.embedding(input, offset)

# Currently, EmbeddingBag QAT does not support sparse embeddings.
embeddingbag_short_dense_configs = [config for config in configs.embeddingbag_short_configs
                                    if {'sparse': True} not in config]

op_bench.generate_pt_test(embeddingbag_short_dense_configs, QATEmbeddingBagBenchmark)
op_bench.generate_pt_gradient_test(embeddingbag_short_dense_configs, QATEmbeddingBagBenchmark)

class QATEmbeddingBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, num_embeddings, embedding_dim, input_size, device):
        qconfig = default_embedding_qat_qconfig
        self.embedding = nnqat.Embedding(
            num_embeddings=num_embeddings,
            embedding_dim=embedding_dim,
            qconfig=qconfig, device=device)
        self.embedding.qconfig = default_embedding_qat_qconfig
        numpy.random.seed((1 << 32) - 1)
        self.input = torch.tensor(numpy.random.randint(0, num_embeddings, input_size),
                                  device=device).long()
        self.inputs = {"input": self.input}
        self.set_module_name('qatEmbedding')
        self.input = torch.rand(N, C, H, W, dtype=torch.float)
        self.scale = torch.tensor([1.])
        self.zero_point = torch.tensor([0.])
        self.input.requires_grad_()
        self.scale.requires_grad_()
        self.zero_point.requires_grad_()

    def forward(self):
        return torch._fake_quantize_learnable_per_tensor_affine(
            self.input, self.scale, self.zero_point, self.quant_min,
            self.quant_max)


op_bench.generate_pt_test(fake_quantize_learnable_configs,
                          FakeQuantizeLearnablePerTensorBenchmark)
op_bench.generate_pt_gradient_test(fake_quantize_learnable_configs,
                                   FakeQuantizeLearnablePerTensorBenchmark)


class FakeQuantizeLearnablePerChannelBenchmark(op_bench.TorchBenchmarkBase):
    r"""Benchmarks learnable fake quantize per channel."""
    def init(self, N, C, H, W, nbits):
        torch.manual_seed(TORCH_RANDOM_SEED)
        self.quant_min = 0
        self.quant_max = 2**nbits - 1
        self.quant_range = 2**nbits
        # Axis is chosen with respect to the number of channels: C.
        self.axis = 1
        self.input = torch.rand(N, C, H, W, dtype=torch.float)
        self.scale = torch.tensor([1.] * C)
        self.zero_point = torch.tensor([0.] * C)
        self.input.requires_grad_()