class ModelsHandler: input_shape: tuple num_actions: int lr: float = field(default=0.001) def __post_init__(self): self.device = 'cuda' if torch.cuda.is_available() else 'cpu' self.model = ConvNet(self.input_shape, self.num_actions, self.lr).to(self.device) self.tgt_model = ConvNet(self.input_shape, self.num_actions, self.lr).to(self.device) self.model_update_count = 0 self.current_loss = 0 def train_step(self, rb: ReplayBuffer, sample_size=300): # loss calcualation trans_sts = rb.sample(sample_size) states = torch.stack([trans.state_tensor for trans in trans_sts]).to(self.device) next_states = torch.stack( [trans.next_state_tensor for trans in trans_sts]).to(self.device) not_done = torch.Tensor([trans.not_done_tensor for trans in trans_sts]).to(self.device) actions = [trans.action for trans in trans_sts] rewards = torch.stack([trans.reward_tensor for trans in trans_sts]).to(self.device) with torch.no_grad(): qvals_predicted = self.tgt_model(next_states).max(-1) self.model.optimizer.zero_grad() qvals_current = self.model(states) one_hot_actions = torch.nn.functional.one_hot( torch.LongTensor(actions), self.num_actions).to(self.device) loss = ((rewards + (not_done * qvals_predicted.values) - torch.sum(qvals_current * one_hot_actions, -1))**2).mean() loss.backward() self.model.optimizer.step() return loss.detach().item() def update_target_model(self): state_dict = deepcopy(self.model.state_dict()) self.tgt_model.load_state_dict(state_dict) self.model_update_count += 1 def save_target_model(self): file_name = f"{datetime.now().strftime('%H:%M:%S')}.pth" temp_dir = os.environ.get('TMPDIR', '/tmp') file_name = os.path.join(temp_dir, file_name) torch.save(self.model, file_name) wandb.save(file_name)
#net.set_masks(masks) #print("--- {}% parameters pruned ---".format(param['pruning_perc'])) test(new_net, loader_test) # Retraining criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(new_net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) train(new_net, criterion, optimizer, param, loader_train) # Check accuracy and nonzeros weights in each layer print("--- After retraining ---") test(new_net, loader_test) # Save and load the entire model #torch.save(net.state_dict(), 'models/convnet_pruned.pkl') import os torch.save({ 'cfg': cfg, 'state_dict': new_net.state_dict() }, os.path.join('models', 'conv-pruned1.pth.tar')) checkpoint = torch.load('models/conv-pruned1.pth.tar') net2 = ConvNet(checkpoint['cfg']) net2.load_state_dict(checkpoint['state_dict']) print(sum([param.nelement() for param in net2.parameters()])) test(net2, loader_test)
# Load the pretrained model net = ConvNet() net.load_state_dict(torch.load('models/convnet_pretrained.pkl')) if torch.cuda.is_available(): print('CUDA ensabled.') net.cuda() print("--- Pretrained network loaded ---") test(net, loader_test) # prune the weights masks = filter_prune(net, param['pruning_perc']) net.set_masks(masks) print("--- {}% parameters pruned ---".format(param['pruning_perc'])) test(net, loader_test) # Retraining criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) train(net, criterion, optimizer, param, loader_train) # Check accuracy and nonzeros weights in each layer print("--- After retraining ---") test(net, loader_test) prune_rate(net) # Save and load the entire model torch.save(net.state_dict(), 'models/convnet_pruned.pkl')
def main(): # data normalization input_size = 224 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # data loaders kwargs = {'num_workers': 8, 'pin_memory': True} if args.cuda else {} if args.da: train_transforms = transforms.Compose([ random_transform, transforms.ToPILImage(), transforms.Resize((input_size, input_size)), transforms.ToTensor(), normalize ]) else: train_transforms = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((input_size, input_size)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) test_transforms = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((input_size, input_size)), transforms.ToTensor(), normalize ]) train_loader = torch.utils.data.DataLoader(DataLoader(df_train, train_transforms, root=args.data_dir, mode=args.mode), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(DataLoader(df_gal, test_transforms, root=args.data_dir, mode=args.mode), batch_size=args.batch_size, shuffle=False, **kwargs) # instanciate the models output_shape, backbone = get_backbone(args) embed = LinearProjection(output_shape, args.dim_embed) model = ConvNet(backbone, embed) # instanciate the proxies fsem = get_semantic_fname(args.word) path_semantic = os.path.join('aux', 'Semantic', args.dataset, fsem) train_proxies = get_proxies(path_semantic, df_train['cat'].cat.categories) test_proxies = get_proxies(path_semantic, df_gal['cat'].cat.categories) train_proxynet = ProxyNet(args.n_classes, args.dim_embed, proxies=torch.from_numpy(train_proxies)) test_proxynet = ProxyNet(args.n_classes_gal, args.dim_embed, proxies=torch.from_numpy(test_proxies)) # criterion criterion = ProxyLoss(args.temperature) if args.multi_gpu: model = nn.DataParallel(model) if args.cuda: backbone.cuda() embed.cuda() model.cuda() train_proxynet.cuda() test_proxynet.cuda() parameters_set = [] low_layers = [] upper_layers = [] for c in backbone.children(): low_layers.extend(list(c.parameters())) for c in embed.children(): upper_layers.extend(list(c.parameters())) parameters_set.append({ 'params': low_layers, 'lr': args.lr * args.factor_lower }) parameters_set.append({'params': upper_layers, 'lr': args.lr * 1.}) optimizer = optim.SGD(parameters_set, lr=args.lr, momentum=0.9, nesterov=True, weight_decay=args.wd) n_parameters = sum([p.data.nelement() for p in model.parameters()]) print(' + Number of params: {}'.format(n_parameters)) scheduler = CosineAnnealingLR(optimizer, args.epochs * len(train_loader), eta_min=3e-6) print('Starting training...') for epoch in range(args.start_epoch, args.epochs + 1): # update learning rate scheduler.step() # train for one epoch train(train_loader, model, train_proxynet.proxies.weight, criterion, optimizer, epoch, scheduler) val_acc = evaluate(test_loader, model, test_proxynet.proxies.weight, criterion) # saving if epoch == args.epochs: save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict()}) print('\nResults on test set (end of training)') write_logs('\nResults on test set (end of training)') test_acc = evaluate(test_loader, model, test_proxynet.proxies.weight, criterion)
import torch import torch.nn.functional as F from torch.autograd import Variable import copy import time from models import ConvNet, nCrossEntropyLoss from config import DefaultConfig from data.dataset import data_loader, data, dataset_size from utils.utils import equal net = ConvNet() optimizer = torch.optim.Adam(net.parameters(), lr=0.001) loss_func = nCrossEntropyLoss() best_model_wts = copy.deepcopy(net.state_dict()) best_acc = 0.0 since = time.time() for epoch in range(DefaultConfig.EPOCH): running_loss = 0.0 running_corrects = 0 for step, (inputs, label) in enumerate(data_loader): # 用 0 填充 LongTensor pred = torch.LongTensor(DefaultConfig.BATCH_SIZE, 1).zero_() inputs = Variable(inputs) # (bs, 3, 60, 160) label = Variable(label) # (bs, 4) # 梯度清零 optimizer.zero_grad()
def main(args): init_process_group(backend='nccl') with open(args.config) as file: config = json.load(file) config.update(vars(args)) config = apply_dict(Dict, config) backends.cudnn.benchmark = True backends.cudnn.fastest = True cuda.set_device(distributed.get_rank() % cuda.device_count()) train_dataset = ImageDataset(root=config.train_root, meta=config.train_meta, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ) * 3, (0.5, ) * 3) ])) val_dataset = ImageDataset(root=config.val_root, meta=config.val_meta, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ) * 3, (0.5, ) * 3) ])) train_sampler = utils.data.distributed.DistributedSampler(train_dataset) val_sampler = utils.data.distributed.DistributedSampler(val_dataset) train_data_loader = utils.data.DataLoader( dataset=train_dataset, batch_size=config.local_batch_size, sampler=train_sampler, num_workers=config.num_workers, pin_memory=True) val_data_loader = utils.data.DataLoader(dataset=val_dataset, batch_size=config.local_batch_size, sampler=val_sampler, num_workers=config.num_workers, pin_memory=True) model = ConvNet(conv_params=[ Dict(in_channels=3, out_channels=32, kernel_size=5, padding=2, stride=2, bias=False), Dict(in_channels=32, out_channels=64, kernel_size=5, padding=2, stride=2, bias=False), ], linear_params=[ Dict(in_channels=3136, out_channels=1024, kernel_size=1, bias=False), Dict(in_channels=1024, out_channels=10, kernel_size=1, bias=True), ]) config.global_batch_size = config.local_batch_size * distributed.get_world_size( ) config.optimizer.lr *= config.global_batch_size / config.global_batch_denom optimizer = optim.Adam(model.parameters(), **config.optimizer) epoch = 0 global_step = 0 if config.checkpoint: checkpoint = Dict(torch.load(config.checkpoint)) model.load_state_dict(checkpoint.model_state_dict) optimizer.load_state_dict(checkpoint.optimizer_state_dict) epoch = checkpoint.last_epoch + 1 global_step = checkpoint.global_step def train(data_loader): nonlocal global_step model.train() for images, labels in data_loader: images = images.cuda() labels = labels.cuda() optimizer.zero_grad() logits = model(images) loss = nn.functional.cross_entropy(logits, labels) loss.backward(retain_graph=True) average_gradients(model.parameters()) optimizer.step() predictions = logits.topk(k=1, dim=1)[1].squeeze() accuracy = torch.mean((predictions == labels).float()) average_tensors([loss, accuracy]) global_step += 1 dprint(f'[training] epoch: {epoch} global_step: {global_step} ' f'loss: {loss:.4f} accuracy: {accuracy:.4f}') @torch.no_grad() def validate(data_loader): model.eval() losses = [] accuracies = [] for images, labels in data_loader: images = images.cuda() labels = labels.cuda() logits = model(images) loss = nn.functional.cross_entropy(logits, labels) predictions = logits.topk(k=1, dim=1)[1].squeeze() accuracy = torch.mean((predictions == labels).float()) average_tensors([loss, accuracy]) losses.append(loss) accuracies.append(accuracy) loss = torch.mean(torch.stack(losses)).item() accuracy = torch.mean(torch.stack(accuracies)).item() dprint(f'[validation] epoch: {epoch} global_step: {global_step} ' f'loss: {loss:.4f} accuracy: {accuracy:.4f}') @torch.no_grad() def feed(data_loader): model.eval() for images, _ in data_loader: images = images.cuda() logits = model(images) def save(): if not distributed.get_rank(): os.makedirs('checkpoints', exist_ok=True) torch.save( dict(model_state_dict=model.state_dict(), optimizer_state_dict=optimizer.state_dict(), last_epoch=epoch, global_step=global_step), os.path.join('checkpoints', f'epoch_{epoch}')) if config.training: model.cuda() broadcast_tensors(model.state_dict().values()) for epoch in range(epoch, config.num_training_epochs): train_sampler.set_epoch(epoch) train(train_data_loader) validate(val_data_loader) save() if config.validation: model.cuda() broadcast_tensors(model.state_dict().values()) validate(val_data_loader) if config.quantization: model.cuda() broadcast_tensors(model.state_dict().values()) with QuantizationEnabler(model): with BatchStatsUser(model): for epoch in range(epoch, config.num_quantization_epochs): train_sampler.set_epoch(epoch) train(train_data_loader) validate(val_data_loader) save() with AverageStatsUser(model): for epoch in range(epoch, config.num_quantization_epochs): train_sampler.set_epoch(epoch) train(train_data_loader) validate(val_data_loader) save()