def _compute(submit_config, config, model=None, force_recompute=False): basedir = Path(submit_config.run_dir) outdir = basedir / 'out' if config.n is None: raise RuntimeError('Must specify number of samples with -n=XXX') if model and not isinstance(model, InstrumentedModel): raise RuntimeError('Passed model has to be wrapped in "InstrumentedModel"') if config.use_w and not 'StyleGAN' in config.model: raise RuntimeError(f'Cannot change latent space of non-StyleGAN model {config.model}') transformer = get_estimator(config.estimator, config.components, config.sparsity) dump_name = "{}-{}_{}_{}_n{}{}{}.npz".format( config.model.lower(), config.output_class.replace(' ', '_'), config.layer.lower(), transformer.get_param_str(), config.n, '_w' if config.use_w else '', f'_seed{config.seed}' if config.seed else '' ) # dump_path = basedir / 'cache' / 'components' / dump_name dump_path = basedir / 'cache' / 'components' / dump_name if not dump_path.is_file() or force_recompute: print('Not cached') t_start = datetime.datetime.now() compute(config, dump_path, model) print('Total time:', datetime.datetime.now() - t_start) return dump_path
def __init__(self, action_space, cmdl): BaseAgent.__init__(self, action_space) self.name = "NEC_agent" self.cmdl = cmdl self.dtype = TorchTypes() self.slow_lr = slow_lr = cmdl.slow_lr self.fast_lr = fast_lr = cmdl.fast_lr dnd = cmdl.dnd # Feature extractor and embedding size FeatureExtractor = get_estimator(cmdl.estimator) state_dim = (1, 24) if not cmdl.rescale else (1, 84) if dnd.linear_projection: self.conv = FeatureExtractor(state_dim, dnd.linear_projection) elif dnd.linear_projection is False: self.conv = FeatureExtractor(state_dim, None) embedding_size = self.conv.get_embedding_size() # DNDs, Memory, N-step buffer self.dnds = [ DND(dnd.size, embedding_size, dnd.knn_no) for i in range(self.action_no) ] self.replay_memory = ReplayMemory(capacity=cmdl.experience_replay) self.N_step = self.cmdl.n_horizon self.N_buff = [] self.optimizer = torch.optim.Adam(self.conv.parameters(), lr=slow_lr) self.optimizer.zero_grad() self.update_q = update_rule(fast_lr) # Temp data, flags, stats, misc self._key_tmp = None self.knn_ready = False self.initial_val = 0.1 self.max_q = -math.inf
Z_stdev = data['lat_stdev'] n_comp = X_comp.shape[0] data.close() # Transfer components to device tensors = SimpleNamespace( X_comp=torch.from_numpy(X_comp).to(device).float(), #-1, 1, C, H, W X_global_mean=torch.from_numpy(X_global_mean).to( device).float(), # 1, C, H, W X_stdev=torch.from_numpy(X_stdev).to(device).float(), Z_comp=torch.from_numpy(Z_comp).to(device).float(), Z_stdev=torch.from_numpy(Z_stdev).to(device).float(), Z_global_mean=torch.from_numpy(Z_global_mean).to(device).float(), ) transformer = get_estimator(args.estimator, n_comp, args.sparsity) tr_param_str = transformer.get_param_str() # Compute max batch size given VRAM usage max_batch = args.batch_size or (get_max_batch_size(inst, device) if has_gpu else 1) print('Batch size:', max_batch) def show(): if args.batch_mode: plt.close('all') else: plt.show() print(f'[{timestamp()}] Creating visualizations')
def compute(config, dump_name, instrumented_model): global B timestamp = lambda : datetime.datetime.now().strftime("%d.%m %H:%M") print(f'[{timestamp()}] Computing', dump_name.name) # Ensure reproducibility torch.manual_seed(0) # also sets cuda seeds np.random.seed(0) # Speed up backend torch.backends.cudnn.benchmark = True has_gpu = torch.cuda.is_available() device = torch.device('cuda' if has_gpu else 'cpu') layer_key = config.layer if instrumented_model is None: inst = get_instrumented_model(config.model, config.output_class, layer_key, device) model = inst.model else: print('Reusing InstrumentedModel instance') inst = instrumented_model model = inst.model inst.remove_edits() model.set_output_class(config.output_class) # Regress back to w space if config.use_w: print('Using W latent space') model.use_w() inst.retain_layer(layer_key) model.partial_forward(model.sample_latent(1), layer_key) sample_shape = inst.retained_features()[layer_key].shape sample_dims = np.prod(sample_shape) print('Feature shape:', sample_shape) input_shape = inst.model.get_latent_shape() input_dims = inst.model.get_latent_dims() config.components = min(config.components, sample_dims) transformer = get_estimator(config.estimator, config.components, config.sparsity) X = None X_global_mean = None # Figure out batch size if not provided B = config.batch_size or get_max_batch_size(inst, device, layer_key) # Divisible by B (ignored in output name) N = config.n // B * B # Compute maximum batch size based on RAM + pagefile budget target_bytes = 20 * 1_000_000_000 # GB feat_size_bytes = sample_dims * np.dtype('float64').itemsize N_limit_RAM = np.floor_divide(target_bytes, feat_size_bytes) if not transformer.batch_support and N > N_limit_RAM: print('WARNING: estimator does not support batching, ' \ 'given config will use {:.1f} GB memory.'.format(feat_size_bytes / 1_000_000_000 * N)) # 32-bit LAPACK gets very unhappy about huge matrices (in linalg.svd) if config.estimator == 'ica': lapack_max_N = np.floor_divide(np.iinfo(np.int32).max // 4, sample_dims) # 4x extra buffer if N > lapack_max_N: raise RuntimeError(f'Matrices too large for ICA, please use N <= {lapack_max_N}') print('B={}, N={}, dims={}, N/dims={:.1f}'.format(B, N, sample_dims, N/sample_dims), flush=True) # Must not depend on chosen batch size (reproducibility) NB = max(B, max(2_000, 3*config.components)) # ipca: as large as possible! samples = None if not transformer.batch_support: samples = np.zeros((N + NB, sample_dims), dtype=np.float32) torch.manual_seed(config.seed or SEED_SAMPLING) np.random.seed(config.seed or SEED_SAMPLING) # Use exactly the same latents regardless of batch size # Store in main memory, since N might be huge (1M+) # Run in batches, since sample_latent() might perform Z -> W mapping n_lat = ((N + NB - 1) // B + 1) * B latents = np.zeros((n_lat, *input_shape[1:]), dtype=np.float32) with torch.no_grad(): for i in trange(n_lat // B, desc='Sampling latents'): latents[i*B:(i+1)*B] = model.sample_latent(n_samples=B).cpu().numpy() # Decomposition on non-Gaussian latent space samples_are_latents = layer_key in ['g_mapping', 'style'] and inst.model.latent_space_name() == 'W' canceled = False try: X = np.ones((NB, sample_dims), dtype=np.float32) action = 'Fitting' if transformer.batch_support else 'Collecting' for gi in trange(0, N, NB, desc=f'{action} batches (NB={NB})', ascii=True): for mb in range(0, NB, B): z = torch.from_numpy(latents[gi+mb:gi+mb+B]).to(device) if samples_are_latents: # Decomposition on latents directly (e.g. StyleGAN W) batch = z.reshape((B, -1)) else: # Decomposition on intermediate layer with torch.no_grad(): model.partial_forward(z, layer_key) # Permuted to place PCA dimensions last batch = inst.retained_features()[layer_key].reshape((B, -1)) space_left = min(B, NB - mb) X[mb:mb+space_left] = batch.cpu().numpy()[:space_left] if transformer.batch_support: if not transformer.fit_partial(X.reshape(-1, sample_dims)): break else: samples[gi:gi+NB, :] = X.copy() except KeyboardInterrupt: if not transformer.batch_support: sys.exit(1) # no progress yet dump_name = dump_name.parent / dump_name.name.replace(f'n{N}', f'n{gi}') print(f'Saving current state to "{dump_name.name}" before exiting') canceled = True if not transformer.batch_support: X = samples # Use all samples X_global_mean = X.mean(axis=0, keepdims=True, dtype=np.float32) # TODO: activations surely multi-modal...! X -= X_global_mean print(f'[{timestamp()}] Fitting whole batch') t_start_fit = datetime.datetime.now() transformer.fit(X) print(f'[{timestamp()}] Done in {datetime.datetime.now() - t_start_fit}') assert np.all(transformer.transformer.mean_ < 1e-3), 'Mean of normalized data should be zero' else: X_global_mean = transformer.transformer.mean_.reshape((1, sample_dims)) X = X.reshape(-1, sample_dims) X -= X_global_mean X_comp, X_stdev, X_var_ratio = transformer.get_components() assert X_comp.shape[1] == sample_dims \ and X_comp.shape[0] == config.components \ and X_global_mean.shape[1] == sample_dims \ and X_stdev.shape[0] == config.components, 'Invalid shape' # 'Activations' are really latents in a secondary latent space if samples_are_latents: Z_comp = X_comp Z_global_mean = X_global_mean else: Z_comp, Z_global_mean = regression(X_comp, X_global_mean, X_stdev, inst, config) # Normalize Z_comp /= np.linalg.norm(Z_comp, axis=-1, keepdims=True) # Random projections # We expect these to explain much less of the variance random_dirs = get_random_dirs(config.components, np.prod(sample_shape)) n_rand_samples = min(5000, X.shape[0]) X_view = X[:n_rand_samples, :].T assert np.shares_memory(X_view, X), "Error: slice produced copy" X_stdev_random = np.dot(random_dirs, X_view).std(axis=1) # Inflate back to proper shapes (for easier broadcasting) X_comp = X_comp.reshape(-1, *sample_shape) X_global_mean = X_global_mean.reshape(sample_shape) Z_comp = Z_comp.reshape(-1, *input_shape) Z_global_mean = Z_global_mean.reshape(input_shape) # Compute stdev in latent space if non-Gaussian lat_stdev = np.ones_like(X_stdev) if config.use_w: samples = model.sample_latent(5000).reshape(5000, input_dims).detach().cpu().numpy() coords = np.dot(Z_comp.reshape(-1, input_dims), samples.T) lat_stdev = coords.std(axis=1) os.makedirs(dump_name.parent, exist_ok=True) np.savez_compressed(dump_name, **{ 'act_comp': X_comp.astype(np.float32), 'act_mean': X_global_mean.astype(np.float32), 'act_stdev': X_stdev.astype(np.float32), 'lat_comp': Z_comp.astype(np.float32), 'lat_mean': Z_global_mean.astype(np.float32), 'lat_stdev': lat_stdev.astype(np.float32), 'var_ratio': X_var_ratio.astype(np.float32), 'random_stdevs': X_stdev_random.astype(np.float32), }) if canceled: sys.exit(1) # Don't shutdown if passed as param if instrumented_model is None: inst.close() del inst del model del X del X_comp del random_dirs del batch del samples del latents torch.cuda.empty_cache()