示例#1
0
from __future__ import unicode_literals


import operator_benchmark as op_bench
import torch


"""Microbenchmarks for quantized unary operators (point-wise and reduction)."""


# Configs for pointwise and reduction unary ops
qunary_ops_configs_short = op_bench.config_list(
    attr_names=['M', 'N'],
    attrs=[
        [512, 512],
    ],
    cross_product_configs={
        'dtype': [torch.quint8],
    },
    tags=['short']
)

qunary_ops_configs_long = op_bench.cross_product_configs(
    M=[256, 1024],
    N=[256, 1024],
    dtype=[torch.quint8, torch.qint8, torch.qint32],
    tags=['long']
)

class QUnaryOpBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, dtype, op_func):
        f_input = torch.rand(M, N)
示例#2
0
import operator_benchmark as op_bench
import torch

'''Microbenchmarks for the quantized interpolate op.

Note: We are not benchmarking `upsample` as it is being depricated, and calls
the `interpolate` anyway.
'''

qinterpolate_long_configs = op_bench.config_list(
    attr_names=['M', 'N', 'K'],
    attrs=[
        [512, 512, 512],
    ],
    cross_product_configs={
        'dtype': [torch.quint8, torch.qint8, torch.qint32],
        'mode': ['nearest', 'bilinear'],
        'scale': [0.5, 1.0, 2.0],
        'contig': [True],  # TODO: Add `False` after #29435
    },
    tags=['long']
)


qinterpolate_short_configs = op_bench.config_list(
    attr_names=['M', 'N', 'K', 'dtype', 'mode', 'scale', 'contig'],
    attrs=[
        [32, 32, 32, torch.quint8, 'nearest', 0.5, True],  # Downsample
        [32, 32, 32, torch.quint8, 'bilinear', 0.5, True],  # Downsample
        [32, 32, 32, torch.quint8, 'nearest', 2.0, True],  # Upsample
        [32, 32, 32, torch.quint8, 'bilinear', 2.0, True],  # Upsample
示例#3
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import operator_benchmark as op_bench
import torch
"""Microbenchmarks for Chunk operator"""

# Configs for PT Chunk operator
chunk_short_configs = op_bench.config_list(
    attr_names=["M", "N", "chunks"],
    attrs=[
        [256, 512, 2],
        [512, 512, 2],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"],
)

chunks_long_configs = op_bench.cross_product_configs(M=[128, 1024],
                                                     N=[128, 1024],
                                                     chunks=[2, 4],
                                                     device=['cpu', 'cuda'],
                                                     tags=['long'])


class ChunkBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, chunks, device):
示例#4
0
import operator_benchmark as op_bench
import torch
import random
from typing import List


"""Microbenchmarks for Cat operator"""


# Configs for PT Cat operator
cat_configs_short = op_bench.config_list(
    attr_names=['sizes', 'N', 'dim'],
    attrs=[
        [(1,    1,      1), 2, 0], # noqa
        [(512,  512,    2), 2, 1], # noqa
        [(128, 1024,    2), 2, 1], # noqa
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=['short'],
)

cat_configs_long = op_bench.config_list(
    attr_names=['sizes', 'N', 'dim'],
    attrs=[
        [(2**10,    2**10,      2), 2, 0], # noqa
        [(2**10+1,  2**10-1,    2), 2, 1], # noqa
        [(2**10,    2**10,      2), 2, 2], # noqa

        [[ lambda: random.randint(2**6, 2**7),      2**7-17,    2**6+1], # noqa
            5, 0],
import operator_benchmark as op_bench
import torch
import numpy


"""Microbenchmarks for index_select operator."""

# An example input from this configuration is M=4, N=4, dim=0.
index_select_configs_short = op_bench.config_list(
    attr_names=["M", "N", "K", "dim"],
    attrs=[
        [8, 8, 1, 1],
        [256, 512, 1, 1],
        [512, 512, 1, 1],
        [8, 8, 2, 1],
        [256, 512, 2, 1],
        [512, 512, 2, 1],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"]
)


index_select_configs_long = op_bench.cross_product_configs(
    M=[128, 1024],
    N=[128, 1024],
    K=[1, 2],
    dim=[1],
    device=['cpu', 'cuda'],
示例#6
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import operator_benchmark as op_bench
import torch
import torch.nn as nn
"""Microbenchmarks for batchnorm operator."""

configs = op_bench.config_list(attrs=[
    [1, 32, 10],
    [4, 256, 100],
    [16, 1024, 256],
],
                               attr_names=["N", "IN", "OUT"],
                               tags=["short"])


class LinearBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, N, IN, OUT):
        self.input_one = torch.rand(N, IN)
        self.linear = nn.Linear(IN, OUT)
        self.set_module_name("linear")

    def forward(self):
        return self.linear(self.input_one)


op_bench.generate_pt_test(configs, LinearBenchmark)
示例#7
0
import operator_benchmark as op_bench
import torch
import torch.nn.functional as F


"""Microbenchmarks for batchnorm operator."""

batchnorm_configs_short = op_bench.config_list(
    attr_names=["M", "N", "K"],
    attrs=[
        [1, 256, 3136],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"]
)

batchnorm_configs_long = op_bench.cross_product_configs(
    M=[1, 128],
    N=[8192, 2048],
    K=[1],
    device=['cpu', 'cuda'],
    tags=["long"]
)


class BatchNormBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, device):
        self.input_one = torch.rand(M, N, K, device=device, requires_grad=self.auto_set())
示例#8
0
from __future__ import unicode_literals

import operator_benchmark as op_bench
import torch


"""Microbenchmarks for Split operator"""


# Configs for PT Split operator
split_configs_short = op_bench.config_list(
    attr_names=["M", "N", "parts"],
    attrs=[
        [256, 512, 2],
        [512, 512, 2],
    ],
    cross_product_configs={
        'device': ['cpu'],
    },
    tags=["short"],
)

split_configs_long = op_bench.cross_product_configs(
    M=[128, 1024],
    N=[128, 1024],
    parts=[2, 4],
    device=['cpu'],
    tags=['long']
)

示例#9
0
import torch.nn as nn


"""
Microbenchmarks for the softmax operators.
"""


# Configs for softmax ops
softmax_configs_short = op_bench.config_list(
    attr_names=[
        'N', 'C', 'H', 'W'
    ],
    attrs=[
        [4, 3, 256, 256],
        [8, 3, 512, 512],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=['short']
)


softmax_configs_long = op_bench.cross_product_configs(
    N=[8, 16],
    C=[3, 64],
    H=[64, 128],
    W=[64, 128],
    device=['cpu', 'cuda'],
    tags=['long']
示例#10
0
import torch
"""Microbenchmarks for add_ operator. Supports both Caffe2/PyTorch."""

# Configs for PT add operator
add_long_configs = op_bench.cross_product_configs(M=[8, 128],
                                                  N=[32, 64],
                                                  K=[256, 512],
                                                  device=['cpu', 'cuda'],
                                                  tags=["long"])

add_short_configs = op_bench.config_list(
    attr_names=["M", "N", "K"],
    attrs=[
        [1, 1, 1],
        [64, 64, 64],
        [64, 64, 128],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"],
)


class AddBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, device):
        self.input_one = torch.rand(M,
                                    N,
                                    K,
                                    device=device,
                                    requires_grad=self.auto_set())
        self.input_two = torch.rand(M,
        [3, 512, 512, torch.quint8, 'D'],
    ],
    'tags': ['short'],
}

quantize_configs_long_dict = {
    'C': [3, 5,
          8],  # this is reused for per-channel: avoid single channel test
    'M': [256, 1024],
    'N': [256, 1024],
    'dtype': [torch.quint8, torch.qint8, torch.qint32],
    'mode': ['D', 'Q'],
    'tags': ['long'],
}

quantize_per_tensor_configs_short = op_bench.config_list(
    **quantize_configs_short_dict)

quantize_per_tensor_configs_long = op_bench.cross_product_configs(
    **quantize_configs_long_dict)


class QuantizePerTensorBenchmark(op_bench.TorchBenchmarkBase):
    r"""Benchmarks both quantization and dequantization."""
    def init(self, C, M, N, dtype, mode):
        assert (mode in ('Q', 'D'))
        self.input = torch.rand(C, M, N)
        self.dtype = dtype
        self.op = nnq.Quantize(scale=1.0, zero_point=0, dtype=dtype)
        self.set_module_name('QuantizePerTensor')

        if mode == 'D':
示例#12
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import operator_benchmark as op_bench
import torch
"""Microbenchmarks for quantized batchnorm operator."""

batchnorm_configs_short = op_bench.config_list(attr_names=["M", "N", "K"],
                                               attrs=[
                                                   [1, 256, 3136],
                                               ],
                                               cross_product_configs={
                                                   'device': ['cpu'],
                                                   'dtype': (torch.qint8, ),
                                               },
                                               tags=["short"])


class QBatchNormBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, device, dtype):
        self._init(M, N, K, device)
        x_scale = 0.1
        x_zero_point = 0
        self.q_input_one = torch.quantize_per_tensor(self.input_one,
                                                     scale=x_scale,
                                                     zero_point=x_zero_point,
                                                     dtype=dtype)
        self.mean = torch.rand(N)
        self.var = torch.rand(N)
示例#13
0
from caffe2.python import core


"""Microbenchmarks for element-wise ReplaceNaN operator."""

# Configs for C2 ReplaceNaN operator
replace_nan_long_configs = op_bench.cross_product_configs(
    M=[32, 64, 128], N=range(32, 128, 32), dtype=["float", "double"], tags=["long"]
)


replace_nan_short_configs = op_bench.config_list(
    attrs=[
        [16, 16, "float"],
        [16, 16, "double"],
        [64, 64, "float"],
        [64, 64, "double"],
    ],
    attr_names=["M", "N", "dtype"],
    tags=["short"],
)


class ReplaceNaNBenchmark(op_bench_c2.Caffe2BenchmarkBase):
    def init(self, M, N, dtype):
        self.input = self.tensor([M, N], dtype)
        self.set_module_name("replace_nan")

    def forward(self):
        op = core.CreateOperator("ReplaceNaN", self.input, self.input, value=1.0)
        return op
示例#14
0
import operator_benchmark as op_bench
import torch
"""Microbenchmarks for element-wise Add operator. Supports both Caffe2/PyTorch."""

# Configs for PT add operator
add_long_configs = op_bench.cross_product_configs(
    M=[8, 64, 128],
    N=range(2, 10, 3),
    K=[2**x for x in range(0, 3)],
    tags=["long"])

add_short_configs = op_bench.config_list(
    attrs=[
        [64, 64, 64],
        [64, 64, 128],
    ],
    attr_names=["M", "N", "K"],
    tags=["short"],
)


class AddBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K):
        self.input_one = torch.rand(M, N, K)
        self.input_two = torch.rand(M, N, K)
        self.set_module_name("add")

    def forward(self):
        return torch.add(self.input_one, self.input_two)

示例#15
0
import operator_benchmark as op_bench
import torch
from typing import List
"""Microbenchmarks for as_strided operator"""

# Configs for PT as_strided operator
as_strided_configs_short = op_bench.config_list(
    attr_names=["M", "N", "size", "stride", "storage_offset"],
    attrs=[
        [8, 8, (2, 2), (1, 1), 0],
        [256, 256, (32, 32), (1, 1), 0],
        [512, 512, (64, 64), (2, 2), 1],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"],
)

as_strided_configs_long = op_bench.cross_product_configs(
    M=[512],
    N=[1024],
    size=[(16, 16), (128, 128)],
    stride=[(1, 1)],
    storage_offset=[0, 1],
    device=['cpu', 'cuda'],
    tags=['long'])


class As_stridedBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, size, stride, storage_offset, device):
示例#16
0
import torch.nn as nn


"""
Microbenchmarks for the hardsigmoid operator.
"""


# Configs for hardsigmoid ops
hardsigmoid_configs_short = op_bench.config_list(
    attr_names=[
        'N', 'C', 'H', 'W'
    ],
    attrs=[
        [1, 3, 256, 256],
        [4, 3, 256, 256],
    ],
    cross_product_configs={
        'device': ['cpu'],
    },
    tags=['short']
)


hardsigmoid_configs_long = op_bench.cross_product_configs(
    N=[8, 16],
    C=[3],
    H=[256, 512],
    W=[256, 512],
    device=['cpu'],
    tags=['long']
示例#17
0
    LENGTH=range(1, 100),
    M=[1],
    N=[2],
    MAX_LENGTH=range(1, 100),
    device=['cpu', 'cuda'],
    dtype=[torch.int32],
    tags=["long"],
)

clip_ranges_short_configs = op_bench.config_list(
    attrs=[
        [6, 1, 2, 1, torch.int32],
        [7, 1, 2, 2, torch.int32],
        [8, 1, 2, 3, torch.int32],
        [9, 1, 2, 4, torch.int32],
        [10, 1, 2, 5, torch.int32],
    ],
    attr_names=["LENGTH", "M", "N", "MAX_LENGTH", "dtype"],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"],
)


class ClipRangesBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, LENGTH, M, N, MAX_LENGTH, device, dtype):
        self.inputs = {
            "input": torch.rand(LENGTH, M, N, device=device).type(dtype),
            "max_length": MAX_LENGTH
        }
        self.set_module_name("clip_ranges")
示例#18
0
import operator_benchmark as op_bench
import torch

# Configs for pointwise and reduction unary ops
qmethods_configs_short = op_bench.config_list(
    attr_names=['M', 'N'],
    attrs=[
        [32, 32],
    ],
    cross_product_configs={
        'dtype': [torch.quint8],
        'contig': [False, True],
    },
    tags=['short']
)

qmethods_configs_long = op_bench.cross_product_configs(
    M=[256, 1024],
    N=[256, 1024],
    dtype=[torch.qint8, torch.qint32],
    contig=[False, True],
    tags=['long']
)


class _QMethodBenchmarkBase(op_bench.TorchBenchmarkBase):
    def init(self, M, N, dtype, contig):
        f_input = torch.rand(M, N)
        scale = 1.0
        zero_point = 0
        self.q_input = torch.quantize_per_tensor(f_input, scale=scale,
示例#19
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import operator_benchmark as op_bench
import torch
import torch.nn as nn
"""
Microbenchmarks for the softmax operators.
"""

# Configs for softmax ops
softmax_configs_short = op_bench.config_list(attrs=[
    [1, 3, 32, 32],
    [2, 3, 64, 64],
],
                                             attr_names=['N', 'C', 'H', 'W'],
                                             tags=['short'])

softmax_configs_long = op_bench.config_list(attrs=[
    [8, 3, 128, 128],
    [16, 512, 14, 14],
    [16, 256, 28, 28],
],
                                            attr_names=['N', 'C', 'H', 'W'],
                                            tags=['long'])

softmax_ops_list = op_bench.op_list(
    attr_names=['op_name', 'op_func'],
    attrs=[
        ['Softmax', nn.Softmax],
示例#20
0
import operator_benchmark as op_bench
import torch
import numpy
"""Microbenchmarks for gather operator."""

# An example input from this configuration is M=4, N=4, dim=0.
gather_configs_short = op_bench.config_list(attr_names=["M", "N", "dim"],
                                            attrs=[
                                                [256, 512, 0],
                                                [512, 512, 1],
                                            ],
                                            cross_product_configs={
                                                'device': ['cpu', 'cuda'],
                                            },
                                            tags=["short"])

gather_configs_long = op_bench.cross_product_configs(M=[128, 1024],
                                                     N=[128, 1024],
                                                     dim=[0, 1],
                                                     device=['cpu', 'cuda'],
                                                     tags=["long"])


class GatherBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, dim, device):
        self.input_one = torch.rand(M, N, device=device)
        self.dim = dim
        min_val = M if dim == 0 else N
        numpy.random.seed((1 << 32) - 1)
        self.index = torch.tensor(numpy.random.randint(0, min_val, (M, N)),
                                  device=device)
示例#21
0
import operator_benchmark as op_bench
import torch

"""
Microbenchmarks for batch matrix mult with einsum and torch.bmm.
"""

batch_mm_configs_short = op_bench.config_list(
    attr_names=["B", "M", "N", "K"],
    attrs=[
        [4, 5, 3, 2],
        [32, 25, 20, 30],
        [128, 100, 120, 110],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"],
)

batch_mm_configs_long = op_bench.config_list(
    attr_names=["B", "M", "N", "K"],
    attrs=[
        [128, 256, 128, 256],
        [512, 1024, 1024, 512],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["long"],
)
示例#22
0
import torch

import operator_benchmark as op_bench

# 2D pooling will have input matrix of rank 3 or 4
qpool2d_long_configs = op_bench.config_list(
    attrs=(
        #  C    H    W   k       s       p
        (1, 3, 3, (3, 3), (1, 1), (0, 0)),  # dummy        # noqa
        (3, 64, 64, (3, 3), (2, 2), (1, 1)),  # dummy        # noqa
        # VGG16 pools with original input shape: (-1, 3, 224, 224)
        (64, 224, 224, (2, 2), (2, 2), (0, 0)),  # MaxPool2d-4  # noqa
        (256, 56, 56, (2, 2), (2, 2), (0, 0)),  # MaxPool2d-16 # noqa
    ),
    attr_names=(
        'C',
        'H',
        'W',  # Input layout
        'k',
        's',
        'p'),  # Pooling parameters
    cross_product_configs={
        'N': (1, 4),
        'contig': (False, True),
        'dtype': (torch.quint8, ),
    },
    tags=('long', ))

qpool2d_short_configs = op_bench.config_list(
    attrs=((1, 3, 3, (3, 3), (1, 1), (0, 0)), ),  # dummy  # noqa
    attr_names=(
        'C',
示例#23
0
from caffe2.python import core
"""Microbenchmarks for MatMul operator"""

# Configs for C2 Matmul operator
mm_long_configs = op_bench.cross_product_configs(M=[8, 64, 128],
                                                 N=range(2, 10, 3),
                                                 K=[2**x for x in range(0, 3)],
                                                 trans_a=[True, False],
                                                 trans_b=[True, False],
                                                 tags=["long"])

mm_short_configs = op_bench.config_list(
    attrs=[
        [128, 128, 128, False, True],
        [1024, 1024, 256, True, False],
        [8192, 8192, 1024, True, False],
    ],
    attr_names=["M", "N", "K", "trans_a", "trans_b"],
    tags=["short"],
)


class MatMulBenchmark(op_bench_c2.Caffe2BenchmarkBase):
    def init(self, M, N, K, trans_a, trans_b):
        self.input_one = self.tensor([N, M]) if trans_a else self.tensor(
            [M, N])
        self.input_two = self.tensor([K, N]) if trans_b else self.tensor(
            [N, K])
        self.args = {'trans_a': trans_a, 'trans_b': trans_b}
        self.output = self.tensor([M, K])
        self.set_module_name("matmul")
示例#24
0
        self.set_module_name("interpolate")

    def forward(self, input_image, output_size, mode):
        return torch.nn.functional.interpolate(input_image,
                                               size=output_size,
                                               mode=mode,
                                               align_corners=False)


config_short = op_bench.config_list(
    attr_names=["input_size", "output_size"],
    attrs=[
        [(1, 3, 60, 40), (24, 24)],
        [(1, 3, 600, 400), (240, 240)],
        [(1, 3, 320, 320), (256, 256)],
    ],
    cross_product_configs={
        'channels_last': [True, False],
    },
    tags=["short"],
)

config_long = op_bench.config_list(
    attr_names=["input_size", "output_size"],
    attrs=[
        [(1, 3, 320, 320), (512, 512)],
        [(1, 3, 500, 500), (256, 256)],
        [(1, 3, 500, 500), (800, 800)],
        [(2, 128, 64, 46), (128, 128)],
    ],
    cross_product_configs={
示例#25
0
from __future__ import absolute_import, division, print_function, unicode_literals
import operator_benchmark as op_bench
import torch


# Configs for pointwise unary ops
unary_ops_configs = op_bench.config_list(
    attrs=[
        [128, 128],
    ],
    attr_names=["M", "N"],
    tags=["short"]
)


unary_ops_list = op_bench.op_list(
    attr_names=["op_name", "op_func"],
    attrs=[
        ["abs", torch.abs],
        ["acos", torch.acos],
    ],
)


class UnaryOpBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, op_func): 
        self.input_one = torch.rand(M, N)
        self.op_func = op_func

    def forward(self):
        return self.op_func(self.input_one)
示例#26
0
import operator_benchmark as op_bench
import torch
from torch import nn
"""
Microbenchmarks for RNNs.
"""

qrnn_configs = op_bench.config_list(
    attrs=[
        [1, 3, 1],
        [5, 7, 4],
    ],
    # names: input_size, hidden_size, num_layers
    attr_names=["I", "H", "NL"],
    cross_product_configs={
        "B": (True, ),  # Bias always True for quantized
        "D": (False, True),  # Bidirectional
        "dtype": (torch.qint8, )  # Only qint8 dtype works for now
    },
    tags=["short"])


class LSTMBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, I, H, NL, B, D, dtype):
        sequence_len = 128
        batch_size = 16

        # The quantized.dynamic.LSTM has a bug. That's why we create a regular
        # LSTM, and quantize it later. See issue #31192.
        scale = 1.0 / 256
        zero_point = 0
import torch.nn.quantized as nnq
"""
Microbenchmarks for Quantized Linear operators.
"""

# Configs for qlinear
qlinear_configs = op_bench.config_list(
    attrs=[
        [1024, 1024, 1024],
        [64, 800, 320],
        [64, 768, 512],
        [16, 256, 512],
        [128, 128, 128],
        [256, 512, 256],
        [6400, 15, 141],
        [6400, 8, 141],
        [16, 211, 2504],
        [16, 369, 1434],
        [1, 1024, 3496],
        [16, 256, 512],
        [1, 1600, 3456],
    ],
    attr_names=["N", "OUT", "IN"],  # M, N, K
    tags=["short"],
)


class QLinearBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, N, IN, OUT):
        scale = 1.0 / 255
        zero_point = 0
示例#28
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import operator_benchmark as op_bench
import torch
"""Microbenchmarks for MatMul operator"""

# Configs for PT Matmul operator
mm_short_configs = op_bench.config_list(
    attr_names=["M", "N", "K", "trans_a", "trans_b"],
    attrs=[
        [1, 1, 1, True, False],
        [128, 128, 128, True, False],
        [256, 256, 256, False, True],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
    },
    tags=["short"],
)

mm_long_configs = op_bench.cross_product_configs(M=[32],
                                                 N=[512, 128],
                                                 K=[64],
                                                 trans_a=[False, True],
                                                 trans_b=[True, False],
                                                 device=['cpu', 'cuda'],
                                                 tags=["long"])

示例#29
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import operator_benchmark as op_bench
import torch
import torch.nn as nn
"""Microbenchmarks for Linear operator."""

linear_configs_short = op_bench.config_list(attr_names=["N", "IN", "OUT"],
                                            attrs=[
                                                [4, 256, 128],
                                                [16, 512, 256],
                                            ],
                                            cross_product_configs={
                                                'device': ['cpu', 'cuda'],
                                            },
                                            tags=["short"])

linear_configs_long = op_bench.cross_product_configs(N=[32, 64],
                                                     IN=[128, 512],
                                                     OUT=[64, 128],
                                                     device=['cpu', 'cuda'],
                                                     tags=["long"])


class LinearBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, N, IN, OUT, device):
        self.input_one = torch.rand(N, IN, device=device)
        self.linear = nn.Linear(IN, OUT).to(device=device)
import operator_benchmark as op_bench
import torch

"""Microbenchmarks for element-wise Add operator. Supports both Caffe2/PyTorch."""

add_short_configs = op_bench.config_list(
    attr_names=['M', 'N', 'K'], 
    attrs=[
        [8, 16, 32],
        [16, 16, 64],
        [64, 64, 128],
    ],
    cross_product_configs={
        'device': ['cpu', 'cuda'],
        'dtype': [torch.float, torch.float64],
    },
    tags=['short'], 
)


class AddBenchmark(op_bench.TorchBenchmarkBase):
    def init(self, M, N, K, device, dtype): 
        self.input_one = torch.rand(M, N, K, device=device, dtype=dtype, requires_grad=True)
        self.input_two = torch.rand(M, N, K, device=device, dtype=dtype)
        self.set_module_name('add')

    def forward(self):
        return torch.add(self.input_one, self.input_two)


op_bench.generate_pt_test(add_short_configs, AddBenchmark)