'max_rank': 20, 'pca_rank': args.rank, }, *model_cfg.args, **model_cfg.kwargs) swag_model.to(args.device) print('Loading: %s' % args.checkpoint) ckpt = torch.load(args.checkpoint) swag_model.load_state_dict(ckpt['state_dict'], strict=False) swag_model.set_swa() print("SWA:", utils.eval(loaders["train"], swag_model, criterion=losses.cross_entropy)) mean, var, cov_factor = swag_model.get_space() subspace = Subspace(mean, cov_factor) print(torch.norm(cov_factor, dim=1)) nvp_flow = construct_flow(cov_factor.shape[0], device=torch.cuda.current_device()) vi_model = VINFModel(base=model_cfg.base, subspace=subspace, flow=nvp_flow, prior_log_sigma=math.log(args.prior_std) + math.log(args.temperature) / 2, num_classes=num_classes, *model_cfg.args, **model_cfg.kwargs)
for file in os.listdir(args.dir): if "checkpoint" in file and checkpoint_num(file) > 160: path = os.path.join(args.dir, file) print('Loading %s' % path) checkpoint = torch.load(path) model.load_state_dict(checkpoint['state_dict']) #W.append(np.concatenate([p.detach().cpu().numpy().ravel() for p in model.parameters()])) swag_model.collect_model(model) #print('Loading: %s' % args.checkpoint) #ckpt = torch.load(args.checkpoint) #swag_model.load_state_dict(ckpt['state_dict'], strict=False) swag_model.set_swa() mean, var, subspace = swag_model.get_space() mean = mean.cuda() subspace = subspace.cuda() proj_params = torch.zeros(subspace.size(0), 1, dtype=subspace.dtype, device=subspace.device, requires_grad=True) print(proj_params.device, subspace.device) proj_model = ProjectedModel(model=copy.deepcopy(model).cuda(), mean=mean.unsqueeze(1), projection=subspace, proj_params=proj_params)
momentum=0.9, weight_decay=1e-4) loader = generate_dataloaders(N=10) state_dict = None for epoch in range(num_epochs): model.train() for x, y in loader: model.zero_grad() pred = model(x) loss = ((pred - y)**2.0).sum() loss.backward() optimizer.step() small_swag_model.collect_model(model) if epoch == 4: state_dict = small_swag_model.state_dict() small_swag_model.fit() with torch.no_grad(): x = torch.arange(-6., 6., 1.0).unsqueeze(1) for i in range(10): small_swag_model.sample(0.5) small_swag_model(x) _, _ = small_swag_model.get_space(export_cov_factor=False) _, _, _ = small_swag_model.get_space(export_cov_factor=True) small_swag_model.load_state_dict(state_dict)