def main(args): assert args.Prune.pruner != '' model = eval(cfg.MODEL.modeltype)(cfg=args.MODEL).cuda().eval() newmodel = eval(cfg.MODEL.modeltype)(cfg=args.MODEL).cuda().eval() optimizer = optim.Adam(model.parameters(), lr=args.OPTIM.lr_initial) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=args.OPTIM.milestones, gamma=0.1) _Trainer = eval('Trainer_{}'.format(args.DATASET.dataset))( args=args, model=model, optimizer=optimizer, lrscheduler=scheduler) pruner = eval(args.Prune.pruner)(_Trainer, newmodel, cfg=args) pruner.prune(ckpt=None) ##---------count op input = torch.randn(1, 3, 512, 512).cuda() flops, params = profile(model, inputs=(input, ), verbose=False) flops, params = clever_format([flops, params], "%.3f") flopsnew, paramsnew = profile(newmodel, inputs=(input, ), verbose=False) flopsnew, paramsnew = clever_format([flopsnew, paramsnew], "%.3f") print("flops:{}->{}, params: {}->{}".format(flops, flopsnew, params, paramsnew)) if not args.Prune.do_test: ## For AutoSlim, specify the ckpt if args.Prune.pruner == 'AutoSlimPruner': bestfinetune = pruner.finetune(load_last=False, ckpt='logs/265.pth') else: bestfinetune = pruner.finetune(load_last=False) print("finetuned map:{}".format(bestfinetune)) else: ## For AutoSlim, specify the ckpt if args.Prune.pruner == 'AutoSlimPruner': bestfinetune = pruner.test(ckpt='logs/265.pth', ) else: bestfinetune = pruner.test() print("finetuned map:{}".format(bestfinetune))
def main(args): gpus = [str(g) for g in args.devices] os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(gpus) model = eval(cfg.MODEL.modeltype)(cfg=args.MODEL).cuda().eval() newmodel = eval(cfg.MODEL.modeltype)(cfg=args.MODEL).cuda().eval() optimizer = optim.Adam(model.parameters(), lr=args.OPTIM.lr_initial) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=args.OPTIM.milestones, gamma=0.1) _Trainer = eval('Trainer_{}'.format(args.DATASET.dataset))( args=args, model=model, optimizer=optimizer, lrscheduler=scheduler) pruner = SlimmingPruner(_Trainer, newmodel, cfg=args.Prune) # pruner=l1normPruner(_Trainer,newmodel,pruneratio=0.) pruner.prune() ##---------count op input = torch.randn(1, 3, 512, 512).cuda() flops, params = profile(model, inputs=(input, ), verbose=False) flops, params = clever_format([flops, params], "%.3f") flopsnew, paramsnew = profile(newmodel, inputs=(input, ), verbose=False) flopsnew, paramsnew = clever_format([flopsnew, paramsnew], "%.3f") print("flops:{}->{}, params: {}->{}".format(flops, flopsnew, params, paramsnew)) resultold = pruner.test(newmodel=False, validiter=10) resultnew = pruner.test(newmodel=True, validiter=10) print("original map:{},pruned map:{}".format(resultold, resultnew)) bestfinetune = pruner.finetune() print("finetuned map:{}".format(bestfinetune))
def build_model(self): """ DataLoader """ train_transform = transforms.Compose([ transforms.Resize((self.img_size, self.img_size)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) test_transform = transforms.Compose([ transforms.Resize((self.img_size, self.img_size)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) ]) label_transform = transforms.Compose([ transforms.Resize((self.img_size, self.img_size)), transforms.ToTensor()]) self.trainA = ImageFolder(os.path.join(self.dataset, 'trainA'), train_transform,extend_paths=True,return_paths=False) self.trainB = ImageFolder(os.path.join(self.dataset, 'trainB'), train_transform,extend_paths=True,return_paths=False) self.label = ImageFolder(os.path.join(self.dataset, 'label'), label_transform, extend_paths=True,loader="gray") self.testA = ImageFolder(os.path.join(self.dataset, 'testA'), test_transform,extend_paths=True,return_paths=True) self.testB = ImageFolder(os.path.join(self.dataset, 'testB'), test_transform,extend_paths=True,return_paths=True) self.trainA_loader = DataLoader(self.trainA, batch_size=self.batch_size, shuffle=True,pin_memory=True) self.trainB_loader = DataLoader(self.trainB, batch_size=self.batch_size, shuffle=True,pin_memory=True) self.label_loader = DataLoader(self.label, batch_size=self.batch_size, shuffle=True,pin_memory=True) self.testA_loader = DataLoader(self.testA, batch_size=1, shuffle=False,pin_memory=True) self.testB_loader = DataLoader(self.testB, batch_size=1, shuffle=False,pin_memory=True) """ Define Generator, Discriminator """ self.gen2B = networks.NiceResnetGenerator(input_nc=self.img_ch, output_nc=self.img_ch, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light).to(self.device) self.gen2A = networks.NiceResnetGenerator(input_nc=self.img_ch, output_nc=self.img_ch, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light).to(self.device) self.disA = networks.NiceDiscriminator(input_nc=self.img_ch, ndf=self.ch, n_layers=self.n_dis).to(self.device) self.disB = networks.NiceDiscriminator(input_nc=self.img_ch, ndf=self.ch, n_layers=self.n_dis).to(self.device) print('-----------------------------------------------') input = torch.randn([1, self.img_ch, self.img_size, self.img_size]).to(self.device) macs, params = profile(self.disA, inputs=(input, )) macs, params = clever_format([macs*2, params*2], "%.3f") print('[Network %s] Total number of parameters: ' % 'disA', params) print('[Network %s] Total number of FLOPs: ' % 'disA', macs) print('-----------------------------------------------') _,_, _, _, real_A_ae = self.disA(input) macs, params = profile(self.gen2B, inputs=(real_A_ae, )) macs, params = clever_format([macs*2, params*2], "%.3f") print('[Network %s] Total number of parameters: ' % 'gen2B', params) print('[Network %s] Total number of FLOPs: ' % 'gen2B', macs) print('-----------------------------------------------') """ Define Loss """ self.L1_loss = nn.L1Loss().to(self.device) self.MSE_loss = nn.MSELoss().to(self.device) """ Trainer """ self.G_optim = torch.optim.Adam(itertools.chain(self.gen2B.parameters(), self.gen2A.parameters()), lr=self.lr, betas=(0.5, 0.999), weight_decay=self.weight_decay) self.D_optim = torch.optim.Adam(itertools.chain(self.disA.parameters(), self.disB.parameters()), lr=self.lr, betas=(0.5, 0.999), weight_decay=self.weight_decay)
def profile_model(cfg, train_file, save_prefix, use_fl): model = Classifier(cfg) dataloader_train = DataLoader(ImageDataset(train_file, cfg, mode='train'), batch_size=cfg.train_batch_size, num_workers=4, drop_last=True, shuffle=False) device = torch.device("cpu") custom_ops = { ExpPool: count_exp_pool, LinearPool: count_lin_pool, LogSumExpPool: count_log_sum_exp_pool, torch.nn.modules.activation.Sigmoid: count_sig, } for data in dataloader_train: inputs = data[0].to(device) macs, params = profile(model, inputs=(inputs, ), custom_ops=custom_ops) break steps = len(dataloader_train) if use_fl: comm_rounds = cfg.epoch epochs = cfg.local_epoch total_batches = steps * epochs * comm_rounds else: epochs = cfg.epoch total_batches = steps * epochs # When comparing MACs /FLOPs, we want the number to be implementation-agnostic and as general as possible. # The THOP library therefore only considers the number of multiplications and ignore all other operations. total_macs = macs * total_batches total_flops_approx = 2 * total_macs total_macs_formatted, _ = clever_format([total_macs, params], "%.5f") total_flops_approx_formatted, _ = clever_format( [total_flops_approx, params], "%.5f") print(f"Total MACs: {total_macs_formatted}") print(f"Approximate Total FLOPs: {total_flops_approx_formatted}") # Save results to file with open(save_prefix, "w") as f: f.write(f"Total MACs: {total_macs_formatted}\n") f.write(f"Approximate Total FLOPs: {total_flops_approx_formatted}")
def main(): #### options parser = argparse.ArgumentParser() parser.add_argument('--opt', type=str, help='Path to option YAML file.') args = parser.parse_args() opt = option.parse(args.opt, is_train=True) # convert to NoneDict, which returns None for missing keys opt = option.dict_to_nonedict(opt) #### random seed seed = opt['train']['manual_seed'] if seed is None: seed = random.randint(1, 10000) print('Random seed: {}'.format(seed)) util.set_random_seed(seed) torch.backends.cudnn.benchmark = False # torch.backends.cudnn.deterministic = True #### create model model = create_model(opt) #### op counting print('Start counting') var_L = torch.zeros(1, 3, 320, 180).cuda() # var_ref=torch.zeros(1280,720).cuda() # var_H=torch.zeros(1280,720).cuda() print('netG') macs, params = profile(model.netG, inputs=(var_L, )) macs, params = clever_format([macs, params], "%.5f") print('macs:{},params:{}'.format(macs, params))
def get_model_config(): models = ['resnet', 'senet', 'cbam', 'eca-net', 'sknet', 'triplet-attention', 'resnest'] models = list(map(get_model, models)) for net in models: input = torch.randn(1, 3, 224, 224) flops, params = profile(net, inputs=(input,), verbose=False) flops, params = clever_format([flops, params], "%.3f") print(flops, params)
def compute_flops(m, name): from thop import clever_format from thop import profile model = m input = torch.randn(1, 3, 224, 224) flops, params = profile(model, inputs=(input, )) #print(flops, params) flops, params = clever_format([flops, params]) print(name, flops, params) #resnet110 12.508G 1.731M
def get_macs(model, input_tensor): #custom_ops_dict = get_custom_ops() custom_ops_dict = None macs, params = profile(model, inputs=(input_tensor, ), custom_ops=custom_ops_dict, verbose=True) macs, params = clever_format([macs, params], "%.3f") print('Total MACs: {}'.format(macs)) print('Total PARAMs: {}'.format(params))
def count(): # model = torch.load('/home/grey/datasets/rssrai/results/pspnet-none-2convs/pspnet-resnet50_True_False.pth') model = torch.load( '/home/grey/datasets/rssrai/results/deeplab-base/deeplab-resnet50_False_False.pth' ) inputs = torch.randn(1, 4, 256, 256).cuda() flops, params = profile(model, inputs=(inputs, ), verbose=True) flops, params = clever_format([flops, params], "%.3f") print(flops, params)
def ffn_params(): vocab_size = 5000 hidden_size = 512 model = nn.Sequential(nn.Embedding(vocab_size, hidden_size), nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, vocab_size)) inp = torch.ones([1, 200]).long() macs, params = profile(model, inputs=[inp]) macs, params = clever_format([macs, params], "%.3f") print('计算量:{}, 参数量:{}'.format(macs, params))
def prune(config): cfg_path = config.model.cfg_path device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') gpus = config.system.gpus model_fun = lambda: nn.DataParallel(DetectionModel(cfg_path).to(device), device_ids=gpus) sp = SlimmingPruner(model_fun, config) sp.prune() model = sp.model.module new_model = sp.new_model.module inputs = torch.randn(1, 3, 512, 512).to(device) flops, params = profile(model, inputs=(inputs, ), verbose=False) flops, params = clever_format([flops, params], "%.3f") flopsnew, paramsnew = profile(new_model, inputs=(inputs, ), verbose=False) flopsnew, paramsnew = clever_format([flopsnew, paramsnew], "%.3f") print("flops:{}->{}, params: {}->{}".format(flops, flopsnew, params, paramsnew)) sp.test() sp.finetune()
def count_your_model(model, x, y): # your rule here input = torch.randn(1, 3, 224, 224) flops, params = profile(model, inputs=(input, ), custom_ops={YourModule: count_your_model}) #提升输出结果的可读性 from thop import clever_format flops, params = clever_format([flops, params], "%.3f")
def model_summary(_, args): model = tools.build_model(args.cfg, args.weight, device='cpu', dataparallel=False)[0] # print(model) inputs = torch.randn(1, 3, 512, 512) flops, params = profile(model, inputs=(inputs, ), verbose=False) flops, params = clever_format([flops, params], "%.3f") print('MACs: {}, params: {}'.format(flops, params))
def profile_model(model, config): img_dims = config['img_dims'] x = torch.randn(1, 3, img_dims[0], img_dims[1]) flops, params = profile(model, verbose=False, inputs=(x, ), custom_ops={torch.nn.Dropout2d: None}) flops, params = clever_format([flops, params], "%.4f") print('{}_{}: {} flops, {} params'.format(config['model_name'], img_dims, flops, params)) return flops, params
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.enabled = True logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) logging.info(genotype) dataset = params.datasets['ImageNet'] network_params = { 'C': args.init_channels, 'num_classes': dataset.num_classes, 'layers': args.layers, 'genotype': genotype, } model = Network(**network_params) if args.calc_flops: from thop import profile, clever_format input = torch.randn(1, dataset.num_channels, dataset.hw[0], dataset.hw[1]) flops, num_params = profile(model, inputs=(input, )) flops, num_params = clever_format([flops, num_params], "%.2f") utils.load(model, args.model_path) model = model.cuda() val_transform = data_transforms_imagenet_valid() validdir = os.path.join(args.data, 'val') valid_data = dset.ImageFolder(validdir, val_transform) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=0) with torch.no_grad(): val_acc, infer_time = infer(valid_queue, model, args.report_freq) if args.calc_flops: logging.info( 'Validation Accuracy: %.2f%% | Number of parameters: %s | Inference time: %2.2fms | Flops: %s', val_acc, num_params, infer_time * 1000, flops) else: logging.info('Validation Accuracy: %.2f%% | Inference time: %2.2fms', val_acc, infer_time * 1000)
def compute_flops_and_params(config, model): flops_input = torch.randn(1, 3, config.input_image_size, config.input_image_size) model_on_cuda = next(model.parameters()).is_cuda if model_on_cuda: flops_input = flops_input.cuda() flops, params = profile(model, inputs=(flops_input, ), verbose=False) flops, params = clever_format([flops, params], '%.3f') return flops, params
def rnn_params(): vocab_size = 5000 max_len = 200 hidden_size = 512 model = simple_gru(vocab_size, hidden_size, num_layers=3) # model = EncoderRNN(vocab_size, max_len, input_size=hidden_size, hidden_size=hidden_size, # n_layers=3, bidirectional=True, rnn_cell='gru', variable_lengths=False) inp = torch.ones([1, max_len]).long() macs, params = profile(model, inputs=[inp]) macs, params = clever_format([macs, params], "%.3f") print('计算量:{}, 参数量:{}'.format(macs, params))
def main(): args = get_args() arch = importlib.import_module('arch.' + args.arch) model = arch.config_network(args.config_file) print(model) dummy_images = torch.rand(1, 3, args.crop_size, args.crop_size) macs, params = profile(model, inputs=(dummy_images, )) macs, params = clever_format([macs, params], "%.3f") print(f"macs: {macs}, params: {params}")
def get_flops_params(net, net_name): if net_name == 'vgg16': x = torch.randn(1, 3, 32, 32) elif net_name == "resnet34": x = torch.randn(1, 3, 224, 224) else: print("The net is not provided.") exit(0) macs, params = profile(model=net, inputs=(x, )) flops, params = clever_format([macs, params], "%.3f") print("flops and params: ", flops, params) return flops, params
def CalParams(model, input_tensor): """ Usage: Calculate Params and FLOPs via [THOP](https://github.com/Lyken17/pytorch-OpCounter) Necessarity: from thop import profile from thop import clever_format :param model: :param input_tensor: :return: """ flops, params = profile(model, inputs=(input_tensor,)) flops, params = clever_format([flops, params], "%.3f") print('[Statistics Information]\nFLOPs: {}\nParams: {}'.format(flops, params))
def test(): net = MobileNetV2() x = torch.randn(2,3,32,32) y = net(x) print(y.size()) print(net) from torchvision.models import resnet50 from thop import profile #model = resnet50() input = torch.randn(1, 3, 224, 224) flops, params = profile(net, inputs=(x, )) from thop import clever_format flops, params = clever_format([flops, params], "%.3f") print(flops)
def get_parameter(model): params = list(model.parameters()) # 所有参数放在params里 k = 0 for i in params: l = 1 for j in i.size(): l *= j # 每层的参数存入l,这里也可以print 每层的参数 k = k + l # 各层参数相加 print("all params:" + str(k)) # 输出总的参数 # 可替换为自己的模型及输入 input = torch.randn(1, 3, 32, 32).cuda() flops, params = profile(model, inputs=(input,)) flops, params = clever_format([flops, params], "%.3f") print("flops:",flops,"params:",params)
def __init__(self, flag=False): super(NetworkFactory, self).__init__() module_file = "models.{}".format(system_configs.snapshot_name) # print("module_file: {}".format(module_file)) # models.CornerNet nnet_module = importlib.import_module(module_file) self.model = DummyModule(nnet_module.model(flag=flag)) self.loss = nnet_module.loss() self.network = Network(self.model, self.loss) self.network = DataParallel(self.network, chunk_sizes=system_configs.chunk_sizes) self.flag = flag # Count total parameters total_params = 0 for params in self.model.parameters(): num_params = 1 for x in params.size(): num_params *= x total_params += num_params print("Total parameters: {}".format(total_params)) # Count MACs when input is 360 x 640 x 3 input_test = torch.randn(1, 3, 360, 640).cuda() input_mask = torch.randn(1, 3, 360, 640).cuda() macs, params, = profile(self.model, inputs=(input_test, input_mask), verbose=False) macs, _ = clever_format([macs, params], "%.3f") print('MACs: {}'.format(macs)) if system_configs.opt_algo == "adam": self.optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, self.model.parameters())) elif system_configs.opt_algo == "sgd": self.optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad, self.model.parameters()), lr=system_configs.learning_rate, momentum=0.9, weight_decay=0.0001) elif system_configs.opt_algo == 'adamW': self.optimizer = torch.optim.AdamW(filter( lambda p: p.requires_grad, self.model.parameters()), lr=system_configs.learning_rate, weight_decay=1e-4) else: raise ValueError("unknown optimizer")
def main(): patchnet = PatchNetG() patchnet.create_architecture() patchnet.eval() patchnet.cuda() x = torch.randn(1, 3, 119, 119).cuda() z = torch.randn(1, 3, 64, 64).cuda() flops, params = profile(patchnet, inputs=(x, z), custom_ops={ FourierFeature: count_fourierfeat, MaxPoolSoftSelect: count_poolsoftselect, }) flops, params = clever_format([flops, params], "%.3f") print(flops, params)
def param(size=128): G = generator(size) G.weight_init(mean=0.0, std=0.02) G.cuda() path = "MNIST_DCGAN_results/iter/best_state_" + str(size) + ".pkl" checkpoint = torch.load("MNIST_DCGAN_results/iter/best_state_" + str(size) + ".pkl") G.load_state_dict(checkpoint['G']) z_ = torch.randn((1, 100)).view(-1, 100, 1, 1) z_ = Variable(z_.cuda(), volatile=True) flops, params = profile(G, inputs=(z_, )) flops, params = clever_format([flops, params], "%.3f") print("flops=" + str(flops)) print("params=" + str(params)) return flops, params
def main(): model = DPTNet_base(enc_dim=256, feature_dim=64, hidden_dim=128, layer=6, segment_size=250, nspk=2, win_len=2) # optimizier = TransformerOptimizer(optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-9), k=0.2, d_model=64, warmup_steps=4000) input = torch.rand(1, 32000) output = model(input) flops, params = profile(model, inputs=(input, ), verbose=False) flops, params = clever_format([flops, params], "%.3f") print('output shape:', output.shape) print('model size:', params)
def main(): args = get_args() model_kwargs = {} if args.rectify: model_kwargs['rectified_conv'] = True model_kwargs['rectify_avg'] = args.rectify_avg model = encoding.models.get_model(args.model, **model_kwargs) print(model) dummy_images = torch.rand(1, 3, args.crop_size, args.crop_size) #count_ops(model, dummy_images, verbose=False) macs, params = profile(model, inputs=(dummy_images, )) macs, params = clever_format([macs, params], "%.3f") print(f"macs: {macs}, params: {params}")
def test(self): transform = transformer(True, True, True, False, self.imsize) test_paths = make_dataset(self.test_image_path) make_folder(self.test_label_path, '') make_folder(self.test_color_label_path, '') print(self.model_save_path, self.model_name) self.G.load_state_dict( torch.load(os.path.join(self.model_save_path, self.model_name))) self.G.eval() batch_num = int(self.test_size / self.batch_size) for i in range(batch_num): print(i) imgs = [] for j in range(self.batch_size): path = test_paths[i * self.batch_size + j] img = transform(Image.open(path)) imgs.append(img) imgs = torch.stack(imgs) imgs = imgs.cuda() flops, params = profile( self.G, inputs=(imgs, ), ) flops, params = clever_format([flops, params], "%.3f") print(flops) print(params) return imgs = torch.stack(imgs) imgs = imgs.cuda() # labels_predict = self.G(imgs) labels_predict_plain = generate_label_plain( labels_predict, self.imsize) labels_predict_color = generate_label(labels_predict, self.imsize) for k in range(self.batch_size): cv2.imwrite( os.path.join(self.test_label_path, str(i * self.batch_size + k) + '.png'), labels_predict_plain[k]) save_image( labels_predict_color[k], os.path.join(self.test_color_label_path, str(i * self.batch_size + k) + '.png'))
def statistics(): arg = arg_parser() name = arg.model d = torch.device(arg.device) print("Using device: %s." % d) model = load_model(name) model = model().cuda(d) print("\n", model, "\n") I = torch.randn([4, 3, 32, 32]).cuda(d) print("Testing tensor shape: {}.".format(I.shape)) macs, params = profile(model, inputs=[I]) macs, params = clever_format([macs, params], "%.2f") center_print("Statistics Information") print("Model: {}".format(name)) print("MACs(G): {}".format(macs)) print("Params(M): {}".format(params)) center_print("Ends")
def generate_model(macs_thres=15e9, time_thres=(0, 100), gen_func=detnet_600m): while True: net = gen_func().cuda() inputs = torch.randn(1, 3, 512, 512).cuda() flops, params = profile(net, inputs=(inputs, ), verbose=False) if flops > macs_thres: continue avg_time = tools.compute_time(net, batch_size=16) if avg_time > time_thres[1] or avg_time < time_thres[0]: continue net.attr = { 'MACs': flops, 'params': params, 'avg_time': avg_time, } print(net.cfg) flops, params = clever_format([flops, params], "%.3f") print('MACs: {}, params: {}, {:.2f} ms'.format(flops, params, avg_time)) yield nn.DataParallel(net)