def test_overall(self): expected = 1826843136 input = torch.rand(1, 3, 224, 224) net = resnet18() estimated = count_ops(net, input, print_readable=False) assert(expected == estimated)
def __call__(self, trainer: Trainer): resolution = 3, trainer.data_bundle.output_resolution, trainer.data_bundle.output_resolution fake_input = torch.rand(1, *resolution).to(trainer.device) ops, _ = count_ops(trainer.model, fake_input) macs, params = get_model_complexity_info(trainer.model, resolution, as_strings=False, print_per_layer_stat=True, verbose=True) n_samples = len(trainer.data_bundle.train_dataset) * trainer.batch_size total_train_flops = macs * 2 * 3 * n_samples * trainer.epochs results = { "flops": ops, "macs": macs, "params": params, "total flops": total_train_flops, "str flops": self._flops_to_string(ops, "FLOPS"), "str macs": self._flops_to_string(macs, "MAC"), "str params": self._flops_to_string(params, "Params"), "str total flops": self._flops_to_string(total_train_flops, "FLOPS") } savefile = os.path.join(os.path.dirname(trainer._save_path), "computational_info.json") with open(savefile, "w+") as fp: json.dump(results, fp)
def test_overall(self): input = torch.rand(1, 3, 224, 224) net = resnet18() estimated, estimations_dict = count_ops(net, input, print_readable=False, verbose=False) expected = 1826843136 assert expected == pytest.approx(estimated, 1000000)
def __init__(self, data_type, device=None): assert data_type in ['MNIST', 'FashionMNIST', 'CIFAR10'] self.data_type = data_type self.n_nodes = 7 self.n_edges = int(self.n_nodes * (self.n_nodes - 1) / 2) self.n_variables = int(self.n_edges + (self.n_nodes - 2) * 2) self.device = device self.n_repeat = 4 if torch.cuda.is_available(): if len(GPUtil.getGPUs()) == 1: self.device = 0 else: assert 0 <= self.device < len(GPUtil.getGPUs()) else: self.device = None self.batch_size = 100 if self.data_type == 'MNIST': self.n_ch_in, self.h_in, self.w_in = MNIST_N_CH_IN, MNIST_H_IN, MNIST_W_IN self.n_ch_base = 8 self.n_epochs = 20 elif self.data_type == 'FashionMNIST': self.n_ch_in, self.h_in, self.w_in = FashionMNIST_N_CH_IN, FashionMNIST_H_IN, FashionMNIST_W_IN self.n_ch_base = 8 self.n_epochs = 20 elif self.data_type == 'CIFAR10': self.n_ch_in, self.h_in, self.w_in = CIFAR10_N_CH_IN, CIFAR10_H_IN, CIFAR10_W_IN self.n_ch_base = 16 self.n_epochs = 20 self.n_vertices = np.array([2] * self.n_variables) most_complex_model = NASBinaryCNN( data_type, np.ones(2 * (self.n_nodes - 2)), np.triu(np.ones((self.n_nodes, self.n_nodes)), 1), n_ch_in=self.n_ch_in, h_in=self.h_in, w_in=self.w_in, n_ch_base=self.n_ch_base) self.suggested_init = init_architectures() dummy_input = next(most_complex_model.parameters()).new_ones( 1, self.n_ch_in, self.h_in, self.w_in) self.max_flops = count_ops(most_complex_model, dummy_input)[0] self.adjacency_mat = [] self.fourier_freq = [] self.fourier_basis = [] for i in range(self.n_variables): adjmat = torch.diag(torch.ones(1), -1) + torch.diag( torch.ones(1), 1) self.adjacency_mat.append(adjmat) laplacian = torch.diag(torch.sum(adjmat, dim=0)) - adjmat eigval, eigvec = torch.symeig(laplacian, eigenvectors=True) self.fourier_freq.append(eigval) self.fourier_basis.append(eigvec)
def countFlop(model, input_size): input = torch.rand(1, input_size[0], input_size[1], input_size[2]) ops, all_data = count_ops(model, input, print_readable=False, verbose=True) flop_idx_dict = {i: 0 for i in range(len(all_data))} flop_layer_dict = {} total_flop = 0 for i, layer in enumerate(all_data): total_flop += layer[1] / ops flop_idx_dict[i] = total_flop flop_layer_dict[layer[0].split("/")[-2]] = total_flop return flop_idx_dict, flop_layer_dict
def countFlops(self): x = torch.rand(1, 3, self.img_dim, self.img_dim).to(self.device) flops_count_dict = {} flops_acc_dict = {} flops_list = [] total_flops = 0 for i, layer in enumerate(self.model.features, 1): ops, all_data = count_ops(layer, x, print_readable=False, verbose=False) x = layer(x) flops_count_dict[i] = ops total_flops += ops flops_acc_dict[i] = total_flops #for key, value in flops_acc_dict.items(): # flops_acc_dict[key] = value/total_flops return flops_count_dict, flops_acc_dict, total_flops
def countFlop(self, input_size): """ This count Flops of the main model input_size: (tuple) input.shape if Tensor: shape = (batch, channel, width, height) if array: shape = (width, height, channel) """ input = torch.rand(1, input_size[0], input_size[1], input_size[2]) ops, all_data = count_ops(self.model, input, print_readable=False, verbose=True) flop_idx_dict = {i: 0 for i in range(len(all_data))} flop_layer_dict = {} total_flop = 0 for i, layer in enumerate(all_data): total_flop += layer[1] / ops flop_idx_dict[i] = total_flop flop_layer_dict[layer[0].split("/")[-2]] = total_flop return flop_idx_dict, flop_layer_dict
net_config_ = args_.net_config n_nodes_ = args_.n_nodes n_epochs_ = args_.n_epochs n_ch_in_ = args_.n_ch_in h_in_ = args_.h_in w_in_ = args_.w_in n_ch_base_ = args_.n_ch_base device_ = args_.device n_edges_ = int(n_nodes_ * (n_nodes_ - 1) / 2) n_variables_ = int(n_edges_ + (n_nodes_ - 2) * 2) assert len(net_config_) == n_variables_ node_type_, adj_mat_ = array2network(np.array([int(net_config_[i:i+1]) for i in range(n_variables_)]), n_nodes_) if adj_mat_ is None: eval_acc_ = 0.1 flops_ = -1 else: model_ = NASBinaryCNN(data_type_, node_type_, adj_mat_, n_ch_in=n_ch_in_, h_in=h_in_, w_in=w_in_, n_ch_base=n_ch_base_) if data_type_ == 'MNIST': train_loader_, valid_loader_, _ = load_mnist(batch_size=args_.batch_size, shuffle=True, random_seed=0) elif data_type_ == 'FashionMNIST': train_loader_, valid_loader_, _ = load_fashionmnist(batch_size=args_.batch_size, shuffle=True, random_seed=0) elif data_type_ == 'CIFAR10': train_loader_, valid_loader_, _ = load_cifar10(batch_size=args_.batch_size, shuffle=True, random_seed=0) eval_acc_ = train(model_, n_epochs_, train_loader_, valid_loader_, device_, display=False) dummy_input_ = next(model_.parameters()).new_ones(1, n_ch_in_, h_in_, w_in_) flops_ = count_ops(model_, dummy_input_, print_readable=False) print(f'eval_acc:{eval_acc_:.4f} flops:{flops_[0]}')
import torch import torch.nn as nn from torchsummary import summary from torchvision import models from pthflops import count_ops device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = models.vgg11_bn().to(device) summary(net,(3,224,224)) inp = torch.rand(1,3,224,224).to(device) count_ops(net, inp)
import torch from models.pfld_vovnet import vovnet_pfld from models.pfld import PFLDInference from pthflops import count_ops device = 'cuda:0' model = PFLDInference().to(device) inp = torch.rand(1, 3, 112, 112).to(device) count_ops(model, inp)
out4_feature = self.scala4(feature_list[3]).view(x.size(0), -1) teacher_feature = out4_feature.detach() feature_loss = ((teacher_feature - out3_feature)**2 + (teacher_feature - out2_feature)**2 +\ (teacher_feature - out1_feature)**2).sum() out1 = self.fc1(out1_feature) out2 = self.fc2(out2_feature) out3 = self.fc3(out3_feature) out4 = self.fc4(out4_feature) return [out4, out3, out2, out1], feature_loss # None is prepared for Hint Learning def resnet_small(pretrained=False, **kwargs): """Constructs a ResNet-small model. Args: pretrained (bool): If True, returns a model pre-trained on ImageNet """ model = ResNet(BasicBlock, [1, 1, 1, 1], **kwargs) return model if __name__ == '__main__': device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = resnet_small(num_classes=10).to(device) inp = torch.rand(1,3,32,32).to(device) ops, _ = count_ops(net, inp, print_readable=False, verbose=False) print(ops)
def main(opt): num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "num_workers": 12 } test_params = { "batch_size": opt.batch_size // 10, "shuffle": False, "drop_last": False, "num_workers": 12 } training_set = Imagenet(root_dir=opt.data_path, mode="train") training_generator = DataLoader(training_set, **training_params) test_set = Imagenet(root_dir=opt.data_path, mode="val") test_generator = DataLoader(test_set, **test_params) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) writer = SummaryWriter(opt.log_path) model = RegNetY(opt.initial_width, opt.slope, opt.quantized_param, opt.network_depth, opt.bottleneck_ratio, opt.group_width, opt.stride, opt.se_ratio) dummy_input = torch.randn((1, 3, TRAIN_IMAGE_SIZE, TRAIN_IMAGE_SIZE)) writer.add_graph(model, dummy_input) # Calculate model FLOPS and number of parameters count_ops(model, dummy_input, verbose=False) summary(model, (3, TRAIN_IMAGE_SIZE, TRAIN_IMAGE_SIZE), device="cpu") if torch.cuda.is_available(): model = nn.DataParallel(model) model = model.cuda() criterion = nn.CrossEntropyLoss() optimizer = SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=True) best_acc1 = 0 model.train() for epoch in range(opt.epochs): adjust_learning_rate(optimizer, epoch, opt.lr) train(training_generator, model, criterion, optimizer, epoch, writer) acc1 = validate(test_generator, model, criterion, epoch, writer) is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) save_checkpoint( { "epoch": epoch + 1, "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, is_best, opt.saved_path)
def main(): print('Dataset is loading ...........') train_loader, val_loader, train_set, validation_set = loadCifa100() print('Make checkpoint folder') checkpoint = os.path.join(configs.checkpoint, configs.model + "_" + configs.attention) if not os.path.exists(checkpoint): os.makedirs(checkpoint) model_path = os.path.join(checkpoint,configs.attention+'_'+'best_model.pt') print('Load model') model = get_model(configs.model, configs.norm,configs.attention) print('\tModel loaded: ' + configs.model ) print('\tAttention type: ' + configs.attention ) print("\tNumber of parameters: ", sum([param.nelement() for param in model.parameters()])) if configs.test: print("Run model in test mode") if os.path.exists(model_path): model.load_state_dict(torch.load(model_path)) else: raise Exception('Cannot find model', model_path) if configs.gpu: if torch.cuda.device_count() > 1: print("Using", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) model.cuda() cudnn.benchmark = True if configs.test: print('Testing...') model.eval() top1, top5 = getAccuracy(model,val_loader,validation_set) print('Accuracy on Top 1 accuracy: %.2f' % top1) print('Accuracy on Top 5 accuracy: %.2f' % top5) return # Change to True if you want to calculate FLOPS if False: from pthflops import count_ops f = open("flops.txt",'a+') inp = torch.rand(2,3,32,32).cuda() FLOPS = count_ops(model,inp) print('\tFLOPS: %d' % FLOPS) f.write('%d\n' % FLOPS) f.close() return # Tensor board tb = SummaryWriter(checkpoint) # Optimization optimizer = optim.SGD(model.parameters(), lr=configs.lr, momentum=0.9, weight_decay=configs.weight_decay,nesterov=True) scheduler = lr_scheduler.MultiStepLR(optimizer, configs.schedule, gamma=0.2) criterion = nn.CrossEntropyLoss() best_val_acc = -1 for epoch in range(args.num_epochs): # Train process learning_rate = optimizer.param_groups[0]['lr'] print('Start training epoch {}. Learning rate {}'.format(epoch, learning_rate)) model.train() num_batches = len(train_set) // configs.batch_size running_loss = 0 for i, (inputs, labels) in enumerate(tqdm(train_loader)): if configs.gpu: inputs, labels = (Variable(inputs.cuda()),Variable(labels.cuda())) labels = labels.squeeze() optimizer.zero_grad() outputs = model(inputs) loss = criterion(outputs, labels) running_loss += loss.data.item() loss.backward() optimizer.step() del inputs, labels scheduler.step() train_loss = running_loss / num_batches print('\tTraining loss %f' % train_loss) model.eval() val_acc = 0 num_batches = len(validation_set) // configs.batch_size + 1 running_loss = 0 with torch.no_grad(): for i, (inputs, labels) in enumerate(val_loader): if configs.gpu: inputs, labels = (Variable(inputs.cuda()),Variable(labels.cuda())) outputs = model(inputs) loss = criterion(outputs, labels) running_loss += loss.data.item() outputs, labels = outputs.data, labels.data _, preds = outputs.topk(1, 1, True, True) preds = preds.t() corrects = preds.eq(labels.view(1, -1).expand_as(preds)) val_acc += torch.sum(corrects) del inputs, labels val_acc = val_acc.item() / len(validation_set) * 100 val_loss = running_loss / num_batches if val_acc > best_val_acc: best_val_acc = val_acc if torch.cuda.device_count() > 1: torch.save(model.module.state_dict(), model_path) else: torch.save(model.state_dict(), model_path) print('\tValidation loss %f' % (running_loss / num_batches)) print('\tValidation acc', val_acc) print() # update tensorboard tb.add_scalar('Learning rate', learning_rate, epoch) tb.add_scalar('Train loss', train_loss, epoch) tb.add_scalar('Val loss', val_loss, epoch) tb.add_scalar('Val top1 acc', val_acc, epoch) print('Best validation acc %.2f' % best_val_acc)
def main(opt): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) cudnn.enabled = True cudnn.benchmark = True training_params = { "batch_size": opt.batch_size * num_gpus, "drop_last": True, "num_workers": 6 } test_params = { "batch_size": opt.batch_size // 10, "shuffle": False, "drop_last": False, "num_workers": 6 } # training_set = Imagenet(root_dir=opt.data_path, mode="train") # training_generator = DataLoader(training_set, collate_fn=collate_fn, **training_params) # test_set = Imagenet(root_dir=opt.data_path, mode="val") # test_generator = DataLoader(test_set, collate_fn=collate_fn, **test_params) if opt.fixres: transformations = get_transforms_fixres(kind='full', crop=True, finetune=True) else: transformations = get_transforms() # training dataloader train_set = ImageFolder(root=os.path.join(opt.data_path, 'train'), transform=transformations['train']) # for weighted sampling class_count = dict( Counter(target for target in train_set.targets if target != len(train_set.classes))) class_count = dict(sorted(class_count.items())) class_count = list(class_count.values()) class_weights = [len(train_set) / cls_count for cls_count in class_count] class_weights = torch.FloatTensor(class_weights) print('class weights: {}'.format(class_weights)) image_weights = class_weights[train_set.targets] train_sampler = WeightedRandomSampler(image_weights, len(image_weights)) training_generator = DataLoader(train_set, collate_fn=collate_fn, sampler=train_sampler, **training_params) # validation dataloader test_set = ImageFolder(root=os.path.join(opt.data_path, 'val'), transform=transformations['val']) test_generator = DataLoader(test_set, collate_fn=collate_fn, **test_params) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) writer = SummaryWriter(opt.log_path) model = RegNetY(opt.initial_width, opt.slope, opt.quantized_param, opt.network_depth, opt.bottleneck_ratio, opt.group_width, opt.stride, opt.se_ratio) dummy_input = torch.randn((1, 3, TRAIN_IMAGE_SIZE, TRAIN_IMAGE_SIZE)) writer.add_graph(model, dummy_input) # Calculate model FLOPS and number of parameters count_ops(model, dummy_input, verbose=False) summary(model, (3, TRAIN_IMAGE_SIZE, TRAIN_IMAGE_SIZE), device="cpu") criterion = nn.CrossEntropyLoss() optimizer = SGD(model.parameters(), lr=opt.lr, momentum=opt.momentum, weight_decay=opt.weight_decay, nesterov=True) best_acc1 = 0 model = model.cuda() if opt.apex: model, optimizer = amp.initialize(model, optimizer, opt_level='O2') model = nn.DataParallel(model) restore_epoch = 0 if opt.restore_model: checkpoint = torch.load(opt.restore_model) # checkpoint = rename_state_dict(checkpoint) model.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint['optimizer']) restore_epoch = checkpoint['epoch'] if opt.apex: amp.load_state_dict(checkpoint['amp']) for epoch in range(opt.epochs): epoch = epoch + restore_epoch adjust_learning_rate(optimizer, epoch, opt.lr) train(training_generator, model, criterion, optimizer, epoch, writer, opt) acc1 = validate(test_generator, model, criterion, epoch, writer) is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if opt.apex: save_checkpoint( { "epoch": epoch + 1, "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), "amp": amp.state_dict(), }, is_best, opt.saved_path, filename="apex_checkpoint.pth.tar") else: save_checkpoint( { "epoch": epoch + 1, "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, is_best, opt.saved_path) if (epoch + 1) % 10 == 0: if opt.apex: save_checkpoint( { "epoch": epoch + 1, "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), "amp": amp.state_dict(), }, False, opt.saved_path, filename="ckpt/apex_checkpoint_epoch{}.pth.tar".format( epoch + 1)) else: save_checkpoint( { "epoch": epoch + 1, "state_dict": model.state_dict(), "best_acc1": best_acc1, "optimizer": optimizer.state_dict(), }, False, opt.saved_path, filename="ckpt/checkpoint_epoch{}.pth.tar".format(epoch + 1))
x = self.layer5(x) all_inters["l5"] = x.detach().cpu(); if not self.compress_layer: out_features.append(x) else: if x.size()[2:] != tmp_shape: tmp_shape = x.size()[2:] out_features.append(x) x = self.layer6(x) out_features.append(x) return out_features,all_inters def resnet45(strides, compress_layer,oupch=512,inpch=1): model = dan_ResNet(BasicBlock, [3, 4, 6, 6, 3], strides, compress_layer,oupch=oupch,inpch=inpch,frac=1) return model def resnet45_thicc(strides, compress_layer,oupch=512,inpch=1): model = dan_ResNet(BasicBlock, [3, 4, 6, 6, 3], strides, compress_layer,oupch=oupch,inpch=inpch,frac=1.5) return model if __name__ == '__main__': import torch; import pthflops strides=[(1,1), (2,2), (1,1), (2,2), (1,1), (1,1)] net=resnet45(strides,None); a=torch.rand([1,1,32,128]); macs, params = pthflops.count_ops(net, a) print(macs); pass;
def get_flops(*args): return count_ops(*args, print_readable=False, verbose=False)[0]