def compute_hessian_eigenthings(model, dataloader, loss, num_eigenthings=10, full_dataset=True, mode='power_iter', use_gpu=True, max_samples=512, hvp_operator_class=HVPOperatorParams, **kwargs): """ Computes the top `num_eigenthings` eigenvalues and eigenvecs for the hessian of the given model by using subsampled power iteration with deflation and the hessian-vector product Parameters --------------- model : Module pytorch model for this netowrk dataloader : torch.data.DataLoader dataloader with x,y pairs for which we compute the loss. loss : torch.nn.modules.Loss | torch.nn.functional criterion loss function to differentiate through num_eigenthings : int number of eigenvalues/eigenvecs to compute. computed in order of decreasing eigenvalue magnitude. full_dataset : boolean if true, each power iteration call evaluates the gradient over the whole dataset. mode : str ['power_iter', 'lanczos'] which backend to use to compute the top eigenvalues. use_gpu: if true, attempt to use cuda for all lin alg computatoins max_samples: the maximum number of samples that can fit on-memory. used to accumulate gradients for large batches. **kwargs: contains additional parameters passed onto lanczos or power_iter. """ hvp_operator = hvp_operator_class(model, dataloader, loss, use_gpu=use_gpu, full_dataset=full_dataset, max_samples=max_samples) if mode == 'power_iter': eigenvals, eigenvecs = deflated_power_iteration(hvp_operator, num_eigenthings, use_gpu=use_gpu, **kwargs) elif mode == 'lanczos': eigenvals, eigenvecs = lanczos(hvp_operator, num_eigenthings, use_gpu=use_gpu, **kwargs) else: raise ValueError( "Unsupported mode %s (must be power_iter or lanczos)" % mode) return eigenvals, eigenvecs
if len(args.idx_rg) == 2: id_str, id_end = args.idx_rg[0], args.idx_rg[1] id_end = min(id_end, codes_all.shape[0]) else: print("doing it all! ") id_str, id_end = 0, codes_all.shape[0] t0 = time() for imgi in range(id_str, id_end): #range(pasu_codes.shape[0] - 1, 0, -1): code = codes_all[imgi, :] feat = torch.from_numpy(code[np.newaxis, :]) feat.requires_grad_(False) if hessian_method == "BackwardIter": metricHVP = GANHVPOperator(G, feat, model_squ) eigvals, eigvects = lanczos( metricHVP, num_eigenthings=800, use_gpu=True) # takes 113 sec on K20x cluster, eigvects = eigvects.T # note the output shape from lanczos is different from that of linalg.eigh, row is eigvec # the spectrum has a close correspondance with the full Hessian. since they use the same graph. elif hessian_method == "ForwardIter": metricHVP = GANForwardMetricHVPOperator(G, feat, model_squ, preprocess=lambda img: img, EPS=args.EPS) #1E-3,) eigvals, eigvects = lanczos( metricHVP, num_eigenthings=800, use_gpu=True, max_steps=200, tol=1e-6,
def compute_hessian_eigenthings(model, dataloader, loss, num_eigenthings=10, full_dataset=True, mode="power_iter", use_gpu=True, fp16=False, max_possible_gpu_samples=2**16, **kwargs): """ Computes the top `num_eigenthings` eigenvalues and eigenvecs for the hessian of the given model by using subsampled power iteration with deflation and the hessian-vector product Parameters --------------- model : Module pytorch model for this netowrk dataloader : torch.data.DataLoader dataloader with x,y pairs for which we compute the loss. loss : torch.nn.modules.Loss | torch.nn.functional criterion loss function to differentiate through num_eigenthings : int number of eigenvalues/eigenvecs to compute. computed in order of decreasing eigenvalue magnitude. full_dataset : boolean if true, each power iteration call evaluates the gradient over the whole dataset. (if False, you might want to check if the eigenvalue estimate variance depends on batch size) mode : str ['power_iter', 'lanczos'] which backend algorithm to use to compute the top eigenvalues. use_gpu: if true, attempt to use cuda for all lin alg computatoins fp16: bool if true, store and do math with eigenvectors, gradients, etc. in fp16. (you should test if this is numerically stable for your application) max_possible_gpu_samples: the maximum number of samples that can fit on-memory. used to accumulate gradients for large batches. (note: if smaller than dataloader batch size, this can have odd interactions with batch norm statistics) **kwargs: contains additional parameters passed onto lanczos or power_iter. """ hvp_operator = HVPOperator( model, dataloader, loss, use_gpu=use_gpu, full_dataset=full_dataset, max_possible_gpu_samples=max_possible_gpu_samples, ) eigenvals, eigenvecs = None, None if mode == "power_iter": eigenvals, eigenvecs = deflated_power_iteration(hvp_operator, num_eigenthings, use_gpu=use_gpu, fp16=fp16, **kwargs) elif mode == "lanczos": eigenvals, eigenvecs = lanczos(hvp_operator, num_eigenthings, use_gpu=use_gpu, fp16=fp16, **kwargs) else: raise ValueError( "Unsupported mode %s (must be power_iter or lanczos)" % mode) return eigenvals, eigenvecs
savedir = r"E:\Cluster_Backup\StyleGAN2\Cats_forw" for triali in range(10): for HVP_eps in [1E-1, 5E-2, 2E-2, 1E-2, 5E-3, 2E-3]: RND = np.random.randint(10000) T0 = time() ref_z = torch.randn(1, latent, device="cuda").cuda() SGhvp = GANForwardMetricHVPOperator( G, ref_z, ImDist, preprocess=lambda img: img, EPS=HVP_eps, ) eigenvals, eigenvecs = lanczos( SGhvp, num_eigenthings=250, max_steps=200, tol=1e-5, ) print(time() - T0, " sec") # 10 eigvect takes 12 sec # 50 eigvect takes 40 sec 40.1 sec # 200 eigvect, 100 steps takes 163 sec #% eigenvecs = eigenvecs.T sort_idx = np.argsort(np.abs(eigenvals)) eigabs_sort = eigenvals[sort_idx] eigvect_sort = eigenvecs[:, sort_idx] #% np.savez( join(savedir, "Hess_trunc%.1f_eps%.E_%03d.npz" % (truncation, HVP_eps, RND)), eigvals=eigenvals,
# for param in alexnet.parameters(): # param.requires_grad_(False) #%% Load the pasupathy codes from scipy.io import loadmat code_path = r"E:\OneDrive - Washington University in St. Louis\ref_img_fit\Pasupathy\pasu_fit_code.mat" out_dir = r"E:\OneDrive - Washington University in St. Louis\ref_img_fit\Pasupathy\Nullspace" data = loadmat(code_path) pasu_codes = data['pasu_code'] #%% Compute the Hessian around a certain Pasupathy image. t0 = time() for imgi, code in enumerate(pasu_codes[:, :]): feat = torch.from_numpy(code[np.newaxis, :]) feat.requires_grad_(False) metricHVP = GANHVPOperator(G, feat, model_squ) eigvals, eigvects = lanczos(metricHVP, num_eigenthings=800, use_gpu=True) print("Finish computing img %d %.2f sec passed, max %.2e min %.2e 10th %.1e 50th %.e 100th %.1e" % (imgi, time() - t0, max(np.abs(eigvals)), min(np.abs(eigvals)), eigvals[-10], eigvals[-50], eigvals[-100])) np.savez(join(out_dir, "pasu_%03d.npz" % imgi), eigvals=eigvals, eigvects=eigvects, code=code) #%% imgi, imgj = 0, 1 with np.load(join(out_dir, "pasu_%03d.npz" % imgi)) as data: basisi = data["eigvects"] eigvi = data["eigvals"] codei = data["code"] with np.load(join(out_dir, "pasu_%03d.npz" % imgj)) as data: basisj = data["eigvects"] eigvj = data["eigvals"] codej = data["code"]
def hessian_compute(G, feat, ImDist, hessian_method="BackwardIter", cutoff=None, preprocess=lambda img: img, EPS=1E-2, device="cuda"): """Higher level API for GAN hessian compute Parameters: G: GAN, usually wrapped up by a custom class. Equipped with a `visualize` function that takes a torch vector and output a torch image feat: a latent code as input to the GAN. ImDist: the image distance function. Support dsim = ImDist(img1, img2). takes in 2 torch images and output a scalar distance. Pass gradient. hessian_method: Currently, "BP" "ForwardIter" "BackwardIter" are supported preprocess: or post processing is the operation on the image generated by GAN. Default to be an identity map. `lambda img: F.interpolate(img, (256, 256), mode='bilinear', align_corners=True)` is a common choice. cutoff: For iterative methods, "ForwardIter" "BackwardIter" this specify how many eigenvectors it's going to compute. """ if cutoff is None: cutoff = feat.numel() // 2 - 1 if 'to' in dir(ImDist): ImDist.to(device) if hessian_method == "BackwardIter": metricHVP = GANHVPOperator(G, feat, ImDist, preprocess=preprocess) eigvals, eigvects = lanczos( metricHVP, num_eigenthings=cutoff, use_gpu=True) # takes 113 sec on K20x cluster, eigvects = eigvects.T # note the output shape from lanczos is different from that of linalg.eigh, row is eigvec H = eigvects @ np.diag(eigvals) @ eigvects.T # the spectrum has a close correspondance with the full Hessian. since they use the same graph. elif hessian_method == "ForwardIter": metricHVP = GANForwardMetricHVPOperator(G, feat, ImDist, preprocess=preprocess, EPS=EPS) # 1E-3,) eigvals, eigvects = lanczos( metricHVP, num_eigenthings=cutoff, use_gpu=True, max_steps=200, tol=1e-6, ) eigvects = eigvects.T H = eigvects @ np.diag(eigvals) @ eigvects.T # EPS=1E-2, max_steps=20 takes 84 sec on K20x cluster. # The hessian is not so close elif hessian_method == "BP": # 240 sec on cluster ref_vect = feat.detach().clone().float().to(device) mov_vect = ref_vect.float().detach().clone().requires_grad_(True) imgs1 = G.visualize(ref_vect) imgs2 = G.visualize(mov_vect) dsim = ImDist(preprocess(imgs1), preprocess(imgs2)) H = get_full_hessian( dsim, mov_vect ) # 122 sec for a 256d hessian, # 240 sec on cluster for 4096d hessian eigvals, eigvects = np.linalg.eigh(H) else: raise NotImplementedError return eigvals, eigvects, H
feat.requires_grad_(True) optimizer = optim.Adam([feat], lr=5e-2) for step in range(200): optimizer.zero_grad() obj = objective(preprocess(G.visualize(feat))) obj.backward() optimizer.step() if np.mod((step + 1), 10) == 0: print("step %d: %.2f" % (step, obj.item())) #%% feat.requires_grad_(False) activHVP = GANForwardHVPOperator(G, feat, objective, preprocess=preprocess) activHVP.apply(1 * torch.randn((4096)).requires_grad_(False).cuda()) #%% t0 = time() eigvals, eigvects = lanczos(activHVP, num_eigenthings=500, use_gpu=True) print(time() - t0) # 40 sec eigvals = eigvals[::-1] eigvects = eigvects[::-1, :] #%% eigvals_u = eigvals eigvects_u = eigvects #%% feat.requires_grad_(False) metricHVP = GANHVPOperator(G, feat, model_squ) t0 = time() eigvals, eigvects = lanczos_generalized(activHVP, metric_operator=metricHVP, num_eigenthings=2, use_gpu=True, tol=1e-2)