Python count_ops示例，pthflops.count_ops Python示例

示例#1

0

显示文件

文件： test_ops.py 项目： lijiunderstand/pytorch-estimate-flops

    def test_overall(self):
        expected = 1826843136
        input = torch.rand(1, 3, 224, 224)
        net = resnet18()
        estimated = count_ops(net, input, print_readable=False)

        assert(expected == estimated)

示例#2

0

显示文件

文件： post_training.py 项目： MLRichter/size_matters_paper

    def __call__(self, trainer: Trainer):
        resolution = 3, trainer.data_bundle.output_resolution, trainer.data_bundle.output_resolution
        fake_input = torch.rand(1, *resolution).to(trainer.device)
        ops, _ = count_ops(trainer.model, fake_input)
        macs, params = get_model_complexity_info(trainer.model,
                                                 resolution,
                                                 as_strings=False,
                                                 print_per_layer_stat=True,
                                                 verbose=True)

        n_samples = len(trainer.data_bundle.train_dataset) * trainer.batch_size
        total_train_flops = macs * 2 * 3 * n_samples * trainer.epochs
        results = {
            "flops": ops,
            "macs": macs,
            "params": params,
            "total flops": total_train_flops,
            "str flops": self._flops_to_string(ops, "FLOPS"),
            "str macs": self._flops_to_string(macs, "MAC"),
            "str params": self._flops_to_string(params, "Params"),
            "str total flops": self._flops_to_string(total_train_flops,
                                                     "FLOPS")
        }
        savefile = os.path.join(os.path.dirname(trainer._save_path),
                                "computational_info.json")

        with open(savefile, "w+") as fp:
            json.dump(results, fp)

示例#3

0

显示文件

文件： test_ops.py 项目： senysenyseny16/pytorch-estimate-flops

 def test_overall(self):
     input = torch.rand(1, 3, 224, 224)
     net = resnet18()
     estimated, estimations_dict = count_ops(net,
                                             input,
                                             print_readable=False,
                                             verbose=False)
     expected = 1826843136
     assert expected == pytest.approx(estimated, 1000000)

示例#4

0

显示文件

文件： nas_binary.py 项目： mujjingun/COMBO

    def __init__(self, data_type, device=None):
        assert data_type in ['MNIST', 'FashionMNIST', 'CIFAR10']
        self.data_type = data_type
        self.n_nodes = 7
        self.n_edges = int(self.n_nodes * (self.n_nodes - 1) / 2)
        self.n_variables = int(self.n_edges + (self.n_nodes - 2) * 2)
        self.device = device
        self.n_repeat = 4
        if torch.cuda.is_available():
            if len(GPUtil.getGPUs()) == 1:
                self.device = 0
            else:
                assert 0 <= self.device < len(GPUtil.getGPUs())
        else:
            self.device = None

        self.batch_size = 100
        if self.data_type == 'MNIST':
            self.n_ch_in, self.h_in, self.w_in = MNIST_N_CH_IN, MNIST_H_IN, MNIST_W_IN
            self.n_ch_base = 8
            self.n_epochs = 20
        elif self.data_type == 'FashionMNIST':
            self.n_ch_in, self.h_in, self.w_in = FashionMNIST_N_CH_IN, FashionMNIST_H_IN, FashionMNIST_W_IN
            self.n_ch_base = 8
            self.n_epochs = 20
        elif self.data_type == 'CIFAR10':
            self.n_ch_in, self.h_in, self.w_in = CIFAR10_N_CH_IN, CIFAR10_H_IN, CIFAR10_W_IN
            self.n_ch_base = 16
            self.n_epochs = 20

        self.n_vertices = np.array([2] * self.n_variables)

        most_complex_model = NASBinaryCNN(
            data_type,
            np.ones(2 * (self.n_nodes - 2)),
            np.triu(np.ones((self.n_nodes, self.n_nodes)), 1),
            n_ch_in=self.n_ch_in,
            h_in=self.h_in,
            w_in=self.w_in,
            n_ch_base=self.n_ch_base)

        self.suggested_init = init_architectures()
        dummy_input = next(most_complex_model.parameters()).new_ones(
            1, self.n_ch_in, self.h_in, self.w_in)
        self.max_flops = count_ops(most_complex_model, dummy_input)[0]

        self.adjacency_mat = []
        self.fourier_freq = []
        self.fourier_basis = []
        for i in range(self.n_variables):
            adjmat = torch.diag(torch.ones(1), -1) + torch.diag(
                torch.ones(1), 1)
            self.adjacency_mat.append(adjmat)
            laplacian = torch.diag(torch.sum(adjmat, dim=0)) - adjmat
            eigval, eigvec = torch.symeig(laplacian, eigenvectors=True)
            self.fourier_freq.append(eigval)
            self.fourier_basis.append(eigvec)

示例#5

0

显示文件

文件： branchyNet.py 项目： pachecobeto95/CARPEX

def countFlop(model, input_size):
    input = torch.rand(1, input_size[0], input_size[1], input_size[2])
    ops, all_data = count_ops(model, input, print_readable=False, verbose=True)
    flop_idx_dict = {i: 0 for i in range(len(all_data))}
    flop_layer_dict = {}

    total_flop = 0
    for i, layer in enumerate(all_data):
        total_flop += layer[1] / ops
        flop_idx_dict[i] = total_flop
        flop_layer_dict[layer[0].split("/")[-2]] = total_flop

    return flop_idx_dict, flop_layer_dict

示例#6

0

显示文件

文件： mobileNet.py 项目： pachecobeto95/distortion_robust_dnns_with_early_exit

    def countFlops(self):
        x = torch.rand(1, 3, self.img_dim, self.img_dim).to(self.device)
        flops_count_dict = {}
        flops_acc_dict = {}
        flops_list = []
        total_flops = 0
        for i, layer in enumerate(self.model.features, 1):
            ops, all_data = count_ops(layer,
                                      x,
                                      print_readable=False,
                                      verbose=False)
            x = layer(x)
            flops_count_dict[i] = ops
            total_flops += ops
            flops_acc_dict[i] = total_flops

        #for key, value in flops_acc_dict.items():
        #  flops_acc_dict[key] = value/total_flops

        return flops_count_dict, flops_acc_dict, total_flops

示例#7

0

显示文件

文件： branchyNet.py 项目： pachecobeto95/CARPEX

    def countFlop(self, input_size):
        """
    This count Flops of the main model
    input_size: (tuple) input.shape
    if Tensor: shape = (batch, channel, width, height)
    if array: shape = (width, height, channel)
    """
        input = torch.rand(1, input_size[0], input_size[1], input_size[2])
        ops, all_data = count_ops(self.model,
                                  input,
                                  print_readable=False,
                                  verbose=True)
        flop_idx_dict = {i: 0 for i in range(len(all_data))}
        flop_layer_dict = {}

        total_flop = 0
        for i, layer in enumerate(all_data):
            total_flop += layer[1] / ops
            flop_idx_dict[i] = total_flop
            flop_layer_dict[layer[0].split("/")[-2]] = total_flop

        return flop_idx_dict, flop_layer_dict

示例#8

0

显示文件

	net_config_ = args_.net_config
	n_nodes_ = args_.n_nodes
	n_epochs_ = args_.n_epochs
	n_ch_in_ = args_.n_ch_in
	h_in_ = args_.h_in
	w_in_ = args_.w_in
	n_ch_base_ = args_.n_ch_base
	device_ = args_.device

	n_edges_ = int(n_nodes_ * (n_nodes_ - 1) / 2)
	n_variables_ = int(n_edges_ + (n_nodes_ - 2) * 2)
	assert len(net_config_) == n_variables_
	node_type_, adj_mat_ = array2network(np.array([int(net_config_[i:i+1]) for i in range(n_variables_)]), n_nodes_)

	if adj_mat_ is None:
		eval_acc_ = 0.1
		flops_ = -1
	else:
		model_ = NASBinaryCNN(data_type_, node_type_, adj_mat_,
		                      n_ch_in=n_ch_in_, h_in=h_in_, w_in=w_in_, n_ch_base=n_ch_base_)
		if data_type_ == 'MNIST':
			train_loader_, valid_loader_, _ = load_mnist(batch_size=args_.batch_size, shuffle=True, random_seed=0)
		elif data_type_ == 'FashionMNIST':
			train_loader_, valid_loader_, _ = load_fashionmnist(batch_size=args_.batch_size, shuffle=True, random_seed=0)
		elif data_type_ == 'CIFAR10':
			train_loader_, valid_loader_, _ = load_cifar10(batch_size=args_.batch_size, shuffle=True, random_seed=0)
		eval_acc_ = train(model_, n_epochs_, train_loader_, valid_loader_, device_, display=False)
		dummy_input_ = next(model_.parameters()).new_ones(1, n_ch_in_, h_in_, w_in_)
		flops_ = count_ops(model_, dummy_input_, print_readable=False)

	print(f'eval_acc:{eval_acc_:.4f} flops:{flops_[0]}')

示例#9

0

显示文件

文件： VGG11_bn.py 项目： bawanag/Comparison-of-Different-Deep-Learning-technologies

import torch
import torch.nn as nn
from torchsummary import summary
from torchvision import models
from pthflops import count_ops
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = models.vgg11_bn().to(device)

summary(net,(3,224,224))
inp = torch.rand(1,3,224,224).to(device)
count_ops(net, inp)

示例#10

0

显示文件

文件： calc_flops.py 项目： samuelyu2002/PFLD

import torch
from models.pfld_vovnet import vovnet_pfld
from models.pfld import PFLDInference

from pthflops import count_ops

device = 'cuda:0'
model = PFLDInference().to(device)
inp = torch.rand(1, 3, 112, 112).to(device)
count_ops(model, inp)

示例#11

0

显示文件

        out4_feature = self.scala4(feature_list[3]).view(x.size(0), -1)

        teacher_feature = out4_feature.detach()
        feature_loss = ((teacher_feature - out3_feature)**2 + (teacher_feature - out2_feature)**2 +\
                        (teacher_feature - out1_feature)**2).sum()

        out1 = self.fc1(out1_feature)
        out2 = self.fc2(out2_feature)
        out3 = self.fc3(out3_feature)
        out4 = self.fc4(out4_feature)

        return [out4, out3, out2, out1], feature_loss
        # None is prepared for Hint Learning


def resnet_small(pretrained=False, **kwargs):
    """Constructs a ResNet-small model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [1, 1, 1, 1], **kwargs)
    return model


if __name__ == '__main__':
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    net = resnet_small(num_classes=10).to(device)
    inp = torch.rand(1,3,32,32).to(device)
    ops, _ = count_ops(net, inp, print_readable=False, verbose=False)
    print(ops)

示例#12

0

显示文件

def main(opt):
    num_gpus = 1
    if torch.cuda.is_available():
        num_gpus = torch.cuda.device_count()
        torch.cuda.manual_seed(123)
    else:
        torch.manual_seed(123)

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "shuffle": True,
        "drop_last": True,
        "num_workers": 12
    }

    test_params = {
        "batch_size": opt.batch_size // 10,
        "shuffle": False,
        "drop_last": False,
        "num_workers": 12
    }

    training_set = Imagenet(root_dir=opt.data_path, mode="train")
    training_generator = DataLoader(training_set, **training_params)

    test_set = Imagenet(root_dir=opt.data_path, mode="val")
    test_generator = DataLoader(test_set, **test_params)

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    writer = SummaryWriter(opt.log_path)
    model = RegNetY(opt.initial_width, opt.slope, opt.quantized_param,
                    opt.network_depth, opt.bottleneck_ratio, opt.group_width,
                    opt.stride, opt.se_ratio)

    dummy_input = torch.randn((1, 3, TRAIN_IMAGE_SIZE, TRAIN_IMAGE_SIZE))
    writer.add_graph(model, dummy_input)
    # Calculate model FLOPS and number of parameters
    count_ops(model, dummy_input, verbose=False)
    summary(model, (3, TRAIN_IMAGE_SIZE, TRAIN_IMAGE_SIZE), device="cpu")

    if torch.cuda.is_available():
        model = nn.DataParallel(model)
        model = model.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(),
                    lr=opt.lr,
                    momentum=opt.momentum,
                    weight_decay=opt.weight_decay,
                    nesterov=True)
    best_acc1 = 0
    model.train()

    for epoch in range(opt.epochs):
        adjust_learning_rate(optimizer, epoch, opt.lr)
        train(training_generator, model, criterion, optimizer, epoch, writer)
        acc1 = validate(test_generator, model, criterion, epoch, writer)

        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        save_checkpoint(
            {
                "epoch": epoch + 1,
                "state_dict": model.state_dict(),
                "best_acc1": best_acc1,
                "optimizer": optimizer.state_dict(),
            }, is_best, opt.saved_path)

示例#13

0

显示文件

def main():

    print('Dataset is loading ...........')
    train_loader, val_loader, train_set, validation_set = loadCifa100()
    print('Make checkpoint folder')
    checkpoint = os.path.join(configs.checkpoint, configs.model + "_" + configs.attention)
    if not os.path.exists(checkpoint):
        os.makedirs(checkpoint)
    model_path = os.path.join(checkpoint,configs.attention+'_'+'best_model.pt')
    print('Load model')
    model = get_model(configs.model, configs.norm,configs.attention)
    print('\tModel loaded: ' + configs.model )
    print('\tAttention type: ' + configs.attention )
    print("\tNumber of parameters: ", sum([param.nelement() for param in model.parameters()]))
    if configs.test:
        print("Run model in test mode")
        if os.path.exists(model_path):
            model.load_state_dict(torch.load(model_path))
        else:
            raise Exception('Cannot find model', model_path)

    if configs.gpu:
        if torch.cuda.device_count() > 1:
            print("Using", torch.cuda.device_count(), "GPUs!")
            model = nn.DataParallel(model)
        model.cuda()
        cudnn.benchmark = True

    if configs.test:
        print('Testing...')
        model.eval()
        top1, top5 = getAccuracy(model,val_loader,validation_set)
        print('Accuracy on Top 1 accuracy: %.2f' % top1)
        print('Accuracy on Top 5 accuracy: %.2f' % top5)
        return
    # Change to True if you want to calculate FLOPS
    if False:
        from pthflops import count_ops
        f = open("flops.txt",'a+')
        inp = torch.rand(2,3,32,32).cuda()
        FLOPS = count_ops(model,inp)
        print('\tFLOPS: %d' % FLOPS)
        f.write('%d\n' % FLOPS)
        f.close()
        return


    # Tensor board
    tb = SummaryWriter(checkpoint)

    # Optimization
    optimizer = optim.SGD(model.parameters(), lr=configs.lr, momentum=0.9, weight_decay=configs.weight_decay,nesterov=True)
    scheduler = lr_scheduler.MultiStepLR(optimizer, configs.schedule, gamma=0.2)
    criterion = nn.CrossEntropyLoss()
    best_val_acc = -1
    for epoch in range(args.num_epochs):
        # Train process
        learning_rate = optimizer.param_groups[0]['lr']
        print('Start training epoch {}. Learning rate {}'.format(epoch, learning_rate))
        model.train()
        num_batches = len(train_set) // configs.batch_size
        running_loss = 0
        for i, (inputs, labels) in enumerate(tqdm(train_loader)):
            if configs.gpu:
                inputs, labels = (Variable(inputs.cuda()),Variable(labels.cuda()))
            labels = labels.squeeze()
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.data.item()
            loss.backward()
            optimizer.step()
            del inputs, labels
        scheduler.step()
        train_loss = running_loss / num_batches
        print('\tTraining loss %f' % train_loss)

        model.eval()
        val_acc = 0
        num_batches = len(validation_set) // configs.batch_size + 1
        running_loss = 0
        with torch.no_grad():
            for i, (inputs, labels) in enumerate(val_loader):
                if configs.gpu:
                    inputs, labels = (Variable(inputs.cuda()),Variable(labels.cuda()))
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.data.item()
                outputs, labels = outputs.data, labels.data
                _, preds = outputs.topk(1, 1, True, True)
                preds = preds.t()
                corrects = preds.eq(labels.view(1, -1).expand_as(preds))
                val_acc += torch.sum(corrects)
                del inputs, labels
        val_acc = val_acc.item() / len(validation_set) * 100
        val_loss = running_loss / num_batches
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            if torch.cuda.device_count() > 1:
                torch.save(model.module.state_dict(), model_path)
            else:
                torch.save(model.state_dict(), model_path)
        print('\tValidation loss %f' % (running_loss / num_batches))
        print('\tValidation acc', val_acc)
        print()

        # update tensorboard
        tb.add_scalar('Learning rate', learning_rate, epoch)
        tb.add_scalar('Train loss', train_loss, epoch)
        tb.add_scalar('Val loss', val_loss, epoch)
        tb.add_scalar('Val top1 acc', val_acc, epoch)

    print('Best validation acc %.2f' % best_val_acc)

示例#14

0

显示文件

文件： train.py 项目： yhsmiley/regnet

def main(opt):
    num_gpus = torch.cuda.device_count()
    torch.cuda.manual_seed(123)

    cudnn.enabled = True
    cudnn.benchmark = True

    training_params = {
        "batch_size": opt.batch_size * num_gpus,
        "drop_last": True,
        "num_workers": 6
    }

    test_params = {
        "batch_size": opt.batch_size // 10,
        "shuffle": False,
        "drop_last": False,
        "num_workers": 6
    }

    # training_set = Imagenet(root_dir=opt.data_path, mode="train")
    # training_generator = DataLoader(training_set, collate_fn=collate_fn, **training_params)

    # test_set = Imagenet(root_dir=opt.data_path, mode="val")
    # test_generator = DataLoader(test_set, collate_fn=collate_fn, **test_params)

    if opt.fixres:
        transformations = get_transforms_fixres(kind='full',
                                                crop=True,
                                                finetune=True)
    else:
        transformations = get_transforms()

    # training dataloader
    train_set = ImageFolder(root=os.path.join(opt.data_path, 'train'),
                            transform=transformations['train'])
    # for weighted sampling
    class_count = dict(
        Counter(target for target in train_set.targets
                if target != len(train_set.classes)))
    class_count = dict(sorted(class_count.items()))
    class_count = list(class_count.values())
    class_weights = [len(train_set) / cls_count for cls_count in class_count]
    class_weights = torch.FloatTensor(class_weights)
    print('class weights: {}'.format(class_weights))
    image_weights = class_weights[train_set.targets]
    train_sampler = WeightedRandomSampler(image_weights, len(image_weights))
    training_generator = DataLoader(train_set,
                                    collate_fn=collate_fn,
                                    sampler=train_sampler,
                                    **training_params)

    # validation dataloader
    test_set = ImageFolder(root=os.path.join(opt.data_path, 'val'),
                           transform=transformations['val'])
    test_generator = DataLoader(test_set, collate_fn=collate_fn, **test_params)

    if os.path.isdir(opt.log_path):
        shutil.rmtree(opt.log_path)
    os.makedirs(opt.log_path)

    if not os.path.isdir(opt.saved_path):
        os.makedirs(opt.saved_path)

    writer = SummaryWriter(opt.log_path)
    model = RegNetY(opt.initial_width, opt.slope, opt.quantized_param,
                    opt.network_depth, opt.bottleneck_ratio, opt.group_width,
                    opt.stride, opt.se_ratio)

    dummy_input = torch.randn((1, 3, TRAIN_IMAGE_SIZE, TRAIN_IMAGE_SIZE))
    writer.add_graph(model, dummy_input)
    # Calculate model FLOPS and number of parameters
    count_ops(model, dummy_input, verbose=False)
    summary(model, (3, TRAIN_IMAGE_SIZE, TRAIN_IMAGE_SIZE), device="cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = SGD(model.parameters(),
                    lr=opt.lr,
                    momentum=opt.momentum,
                    weight_decay=opt.weight_decay,
                    nesterov=True)
    best_acc1 = 0

    model = model.cuda()

    if opt.apex:
        model, optimizer = amp.initialize(model, optimizer, opt_level='O2')

    model = nn.DataParallel(model)

    restore_epoch = 0
    if opt.restore_model:
        checkpoint = torch.load(opt.restore_model)
        # checkpoint = rename_state_dict(checkpoint)
        model.load_state_dict(checkpoint["state_dict"])
        optimizer.load_state_dict(checkpoint['optimizer'])
        restore_epoch = checkpoint['epoch']
        if opt.apex:
            amp.load_state_dict(checkpoint['amp'])

    for epoch in range(opt.epochs):
        epoch = epoch + restore_epoch
        adjust_learning_rate(optimizer, epoch, opt.lr)
        train(training_generator, model, criterion, optimizer, epoch, writer,
              opt)
        acc1 = validate(test_generator, model, criterion, epoch, writer)

        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if opt.apex:
            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "state_dict": model.state_dict(),
                    "best_acc1": best_acc1,
                    "optimizer": optimizer.state_dict(),
                    "amp": amp.state_dict(),
                },
                is_best,
                opt.saved_path,
                filename="apex_checkpoint.pth.tar")
        else:
            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "state_dict": model.state_dict(),
                    "best_acc1": best_acc1,
                    "optimizer": optimizer.state_dict(),
                }, is_best, opt.saved_path)

        if (epoch + 1) % 10 == 0:
            if opt.apex:
                save_checkpoint(
                    {
                        "epoch": epoch + 1,
                        "state_dict": model.state_dict(),
                        "best_acc1": best_acc1,
                        "optimizer": optimizer.state_dict(),
                        "amp": amp.state_dict(),
                    },
                    False,
                    opt.saved_path,
                    filename="ckpt/apex_checkpoint_epoch{}.pth.tar".format(
                        epoch + 1))
            else:
                save_checkpoint(
                    {
                        "epoch": epoch + 1,
                        "state_dict": model.state_dict(),
                        "best_acc1": best_acc1,
                        "optimizer": optimizer.state_dict(),
                    },
                    False,
                    opt.saved_path,
                    filename="ckpt/checkpoint_epoch{}.pth.tar".format(epoch +
                                                                      1))

示例#15

0

显示文件

文件： dan_resnet.py 项目： lancercat/OSOCR

        x = self.layer5(x)
        all_inters["l5"] = x.detach().cpu();

        if not self.compress_layer:
            out_features.append(x)
        else:
            if x.size()[2:] != tmp_shape:
                tmp_shape = x.size()[2:]
                out_features.append(x)
            x = self.layer6(x)
            out_features.append(x)
        return out_features,all_inters

def resnet45(strides, compress_layer,oupch=512,inpch=1):
    model = dan_ResNet(BasicBlock, [3, 4, 6, 6, 3], strides, compress_layer,oupch=oupch,inpch=inpch,frac=1)
    return model
def resnet45_thicc(strides, compress_layer,oupch=512,inpch=1):
    model = dan_ResNet(BasicBlock, [3, 4, 6, 6, 3], strides, compress_layer,oupch=oupch,inpch=inpch,frac=1.5)
    return model
if __name__ == '__main__':
    import torch;
    import pthflops
    strides=[(1,1), (2,2), (1,1), (2,2), (1,1), (1,1)]
    net=resnet45(strides,None);

    a=torch.rand([1,1,32,128]);
    macs, params = pthflops.count_ops(net, a)
    print(macs);
    pass;

示例#16

0

显示文件

文件： utils.py 项目： wsgharvey/ps-nogs

def get_flops(*args):
    return count_ops(*args, print_readable=False, verbose=False)[0]