Exemplo n.º 1
0
'''VGG11/13/16/19 in Pytorch.'''
import torch.nn as nn
from cnns.nnlib.pytorch_layers.conv_picker import Conv
from cnns.nnlib.utils.arguments import Arguments
from cnns.nnlib.utils.general_utils import ConvType
from cnns.nnlib.utils.general_utils import CompressType
import torch

args = Arguments()
args.conv_type = ConvType.FFT2D
args.compress_rate = 80.0
args.dtype = torch.float
args.preserve_energy = None
args.next_power2 = True
args.is_debug = False
args.compress_type = CompressType.STANDARD


def conv3x3(in_planes, out_planes, compress_rate = args.compress_rate,
            stride=1, padding=1, args=args):
    """3x3 convolution with padding"""
    # return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
    #                      padding=1, bias=False)
    args.compress_rate = compress_rate
    return Conv(kernel_sizes=[3], in_channels=in_planes,
                out_channels=[out_planes], strides=[stride],
                padding=[padding], args=args, is_bias=False).get_conv()


cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
Exemplo n.º 2
0
    def test_forward_pass_resnet18(self):
        """
        total time for (ConvType.STANDARD2D-ConvExecType.SERIAL): 6.813918352127075
        total time for (ConvType.FFT2D-ConvExecType.CUDA): 53.35197567939758
        total time for (ConvType.FFT2D-ConvExecType.SGEMM): 55.51149845123291

        total time for (ConvType.STANDARD2D-ConvExecType.SERIAL): 6.736859083175659
        total time for (ConvType.FFT2D-ConvExecType.CUDA): 53.84979581832886
        total time for (ConvType.FFT2D-ConvExecType.SGEMM): 56.26755166053772

        global init time:  0.24471688270568848
        global pad time:  4.250756025314331
        (r)fft time:  8.754997730255127
        conjugate time:  3.734828233718872
        correlation time:  25.324009656906128
        restore time (de-compress/concat output):  0.021800994873046875
        i(r)fft time:  8.525353193283081
        total time for (ConvType.FFT2D-ConvExecType.SGEMM): 56.27733850479126
        GPU mem: 2903

        global init time:  0.2371835708618164
        global pad time:  4.492943286895752
        (r)fft time:  9.08437442779541
        conjugate time:  3.8394811153411865
        correlation time:  25.043412446975708
        restore time (de-compress/concat output):  0.021334409713745117
        i(r)fft time:  5.491833925247192
        total time for (ConvType.FFT2D-ConvExecType.CUDA): 53.804604053497314
        GPU mem: 2679
        """
        if not torch.cuda.is_available():
            print("CUDA device is not available.")
            return

        device = torch.device("cuda")
        print("\ndevice used: ", str(device))

        C = 3
        # dtype = torch.float
        # random mini batch imitating cifar-10
        # N, H, W = 128, 32, 32
        # inputs = torch.randn(N, C, H, W, dtype=dtype, device=device,
        #                      requires_grad=True)
        args = Arguments()
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)

        args.sample_count_limit = 10000
        args.min_batch_size = 32
        args.dataset_name = "cifar10"
        args.test_batch_size = args.min_batch_size
        args.network_type = NetworkType.ResNet18
        from cnns.nnlib.datasets.cifar import get_cifar
        train_loader, test_loader, _, _ = get_cifar(
            args=args, dataset_name=args.dataset_name)

        repetition = 1

        args.in_channels = C
        args.compress_rate = None
        args.preserve_energy = 100
        args.is_debug = True
        args.next_power2 = True
        args.compress_type = CompressType.STANDARD
        args.tensor_type = TensorType.FLOAT32
        args.num_classes = 10
        args.test_batch_size = args.min_batch_size
        args.in_channels = C
        args.dtype = torch.float32
        conv_exec_types = [  # (ConvType.STANDARD2D, ConvExecType.SERIAL),
            # (ConvType.FFT2D, ConvExecType.CUDA),
            (ConvType.FFT2D, ConvExecType.SGEMM),
            # (ConvType.FFT2D, ConvExecType.CUDA_SHARED_LOG),
            # (ConvType.FFT2D, ConvExecType.CUDA_DEEP),
            # (ConvType.FFT2D, ConvExecType.SERIAL),
            # (ConvType.FFT2D, ConvExecType.BATCH),
        ]

        for conv_type, conv_exec_type in conv_exec_types:
            args.conv_type = conv_type
            args.conv_exec_type = conv_exec_type
            model = resnet18(args=args)
            model.to(device)
            model.eval()
            start_eval = time.time()
            for _ in range(repetition):
                for inputs, _ in train_loader:
                    inputs = inputs.to(device)
                    outputs_standard = model(inputs)
            standard_time = time.time() - start_eval
            print(f"total time for ({conv_type}-{conv_exec_type}):"
                  f" {standard_time}")