def compute_cka_internal(model_dir, data_path=None, dataset_name='cifar10', use_batch=True, use_train_mode=False, normalize_act=False): """Compute CKA score of each layer in a model to every other layer in the same model.""" if dataset_name == 'cifar10': if use_train_mode: filename = 'cka_within_model_%d_bn_train_mode.pkl' % FLAGS.cka_batch else: filename = 'cka_within_model_%d.pkl' % FLAGS.cka_batch else: suffix = dataset_name.split('_')[-1] if use_train_mode: filename = 'cka_within_model_%d_%s_bn_train_mode.pkl' % ( FLAGS.cka_batch, suffix) else: filename = 'cka_within_model_%d_%s.pkl' % (FLAGS.cka_batch, suffix) if normalize_act: filename = filename.replace('.pkl', '_normalize_activations.pkl') out_dir = os.path.join(model_dir, filename) if tf.io.gfile.exists(out_dir): return model = tf.keras.models.load_model(model_dir) if use_train_mode: model = convert_bn_to_train_mode(model) n_layers = len(model.layers) cka = MinibatchCKA(n_layers) if use_batch: for _ in range(FLAGS.cka_iter): dataset = load_test_data(FLAGS.cka_batch, shuffle=True, dataset_name=dataset_name, n_data=10000) for images, _ in dataset: cka.update_state(get_activations(images, model, normalize_act)) else: dataset = load_test_data(FLAGS.cka_batch, data_path=data_path, dataset_name=dataset_name) all_images = tf.concat([x[0] for x in dataset], 0) cka.update_state(get_activations(all_images, model)) heatmap = cka.result().numpy() logging.info(out_dir) with tf.io.gfile.GFile(out_dir, 'wb') as f: pickle.dump(heatmap, f)
def get_cosine_sim_activation(model_dir, data_path=None, dataset_name='cifar10'): """Computes cosine similarity between activations of layers within the same block.""" out_dir = os.path.join(model_dir, 'cosine_sim_within_model.pkl') if tf.io.gfile.exists(out_dir): return model = tf.keras.models.load_model(model_dir) n_layers = len(model.layers) dataset = load_test_data(1, data_path=data_path, dataset_name=dataset_name) result = np.zeros((n_layers, n_layers)) n_data = 0 for images, _ in dataset: n_data += 1 activations = get_activations(images, model) for i in range(n_layers): for j in range(i + 1, n_layers): if activations[i].shape != activations[j].shape: continue dist = cosine(activations[i].flatten(), activations[j].flatten()) result[i][j] += dist result[j][i] += dist result /= n_data result = 1 - result logging.info(out_dir) with tf.io.gfile.GFile(out_dir, 'wb') as f: pickle.dump(result, f)
def compute_cka_internal_no_top_component(model_dir, data_path=None, dataset_name='cifar10', use_batch=True): """Compute CKA score of each layer in a model to every other layer in the same model, after removing the effect of top component.""" if dataset_name == 'cifar10': filename = 'cka_within_model_remove_first_pc_%d.pkl' % FLAGS.cka_batch else: suffix = dataset_name.split('_')[-1] filename = 'cka_within_model_remove_first_pc_%d_%s.pkl' % ( FLAGS.cka_batch, suffix) out_dir = os.path.join(model_dir, filename) if tf.io.gfile.exists(out_dir): return model = tf.keras.models.load_model(model_dir) n_layers = len(model.layers) cka = MinibatchCKA(n_layers) if use_batch: for _ in range(FLAGS.cka_iter): dataset = load_test_data(FLAGS.cka_batch, shuffle=True, data_path=data_path, dataset_name=dataset_name) for images, _ in dataset: acts = get_activations(images, model) for i, act in enumerate(acts): act = act.reshape(act.shape[0], -1).T act -= np.mean(act, axis=0) acts[i] = remove_first_pc(act) cka.update_state(acts) else: dataset = load_test_data(FLAGS.cka_batch, data_path=data_path, dataset_name=dataset_name) all_images = tf.concat([x[0] for x in dataset], 0) acts = get_activations(all_images, model) for i, act in enumerate(acts): act = act.reshape(act.shape[0], -1).T act -= np.mean(act, axis=0) acts[i] = remove_first_pc(act) cka.update_state(acts) heatmap = cka.result().numpy() with tf.io.gfile.GFile(out_dir, 'wb') as f: pickle.dump(heatmap, f)
def get_cifar_labels(dataset_name='cifar10', data_path=None): """Get test labels from different versions of CIFAR-10 datasets (full or subsampled).""" dataset = load_test_data(128, dataset_name=dataset_name, data_path=data_path) all_labels = [] for _, labels in dataset: all_labels.extend(labels.numpy()) all_labels = np.array(all_labels) return all_labels
def compute_across_seed_cka(depth, width, use_batch=True, data_path=None, dataset_name='cifar10', normalize_act=False): """For pairs of models that share the same depth & width, compute CKA score of each layer in a model to every other layer in the other model.""" model_files = [ f for f in tf.io.gfile.listdir(FLAGS.experiment_dir) if 'width-%d-' % width in f and 'depth-%d' % depth in f and not f.endswith('.txt') ] n_layers = 0 model_pairs = combinations(model_files, 2) model_pairs = list(model_pairs) pair_count = 0 for m1, m2 in model_pairs: _, _, copy1 = parse_depth_width(m1, return_seed=True) _, _, copy2 = parse_depth_width(m2, return_seed=True) if copy_1 > 10 or copy_2 > 10: continue pair_count += 1 out_dir = os.path.join( FLAGS.experiment_dir, 'cka_across_models', 'cka_across_models_depth_%d_width_%d_batch_%d_copy_%d_copy_%d.pkl' % (depth, width, FLAGS.cka_batch, copy1, copy2)) if normalize_act: out_dir = out_dir.replace('.pkl', '_normalize_activations.pkl') logging.info(out_dir) if tf.io.gfile.exists(out_dir): logging.info('Aborting...') continue model1 = tf.keras.models.load_model( os.path.join(FLAGS.experiment_dir, m1)) model2 = tf.keras.models.load_model( os.path.join(FLAGS.experiment_dir, m2)) if not n_layers: n_layers = len(model1.layers) cka = MinibatchCKA(n_layers, across_models=True) #cka2 = MinibatchCKA(n_layers * 2) if use_batch: for _ in range(FLAGS.cka_iter): dataset = load_test_data(FLAGS.cka_batch, shuffle=True, data_path=data_path, dataset_name=dataset_name) for images, _ in dataset: activations1 = get_activations(images, model1, normalize_act) activations2 = get_activations(images, model2, normalize_act) cka.update_state_across_models(activations1, activations2) #test_CKA(n_layers, n_layers, activations1, activations2, cka1=cka, cka2=cka2) heatmap = cka.result().numpy() with tf.io.gfile.GFile(out_dir, 'wb') as f: pickle.dump(heatmap, f)
def epoch_pc(experiment_dir, batch_size=256, data_path=None, dataset_name='cifar10', use_train_mode=False, n_iter=1): """For each model ckpt, compute first PC of activations of each layer in that ckpt and save to a file.""" if use_train_mode: out_dir = os.path.join(experiment_dir, 'first_pc_all_epochs_bn_train_mode.pkl') else: out_dir = os.path.join(experiment_dir, 'first_pc_all_epochs.pkl') logging.info(out_dir) if tf.io.gfile.exists(out_dir): result = pickle.load(tf.io.gfile.GFile(out_dir, 'rb')) else: result = {} test_dataset = load_test_data( batch_size, data_path=data_path, dataset_name=dataset_name, shuffle=True).repeat() epoch_files = [f for f in tf.io.gfile.listdir(experiment_dir) if 'weights' in f] epoch_files.append('') # include initialization for epoch_file in epoch_files: if 'ckpt' in epoch_file: epoch_no = int(epoch_file.split('.')[1]) else: epoch_no = 0 if epoch_no % 10 != 0: continue if epoch_no in result: continue model = tf.keras.models.load_model(os.path.join(experiment_dir, epoch_file)) if use_train_mode: model = convert_bn_to_train_mode(model) n_layers = len(model.layers) avg_variance_explained = np.zeros((n_layers,)) avg_pc = np.zeros((n_layers, batch_size)) it = 0 for images, _ in test_dataset: it += 1 if it > n_iter: break all_activations = get_activations(images, model) for i, act in enumerate(all_activations): act = act.reshape(act.shape[0], -1) act -= np.mean(act, axis=0) svd = TruncatedSVD(n_components=1, random_state=0) svd.fit(act.T) avg_variance_explained[i] += svd.explained_variance_ratio_[0] act_pc = svd.components_.squeeze() avg_pc[i, :] += act_pc avg_variance_explained /= n_iter avg_pc /= n_iter result[epoch_no] = (avg_pc, avg_variance_explained) with tf.io.gfile.GFile(out_dir, 'wb') as f: pickle.dump(result, f)
def compute_width_depth_cka(model_list1, model_list2, use_batch=True, data_path=None, dataset_name='cifar10', normalize_act=False): """Computes CKA score of each layer in a model to every other layer in another model.""" all_pairs = [(i, j) for i in model_list1 for j in model_list2] random.seed(0) if len(all_pairs) < 20: model_pairs = random.sample(all_pairs, len(all_pairs)) else: model_pairs = random.sample(all_pairs, 20) for m1, m2 in model_pairs: if m1 == m2: # check if we pick the same seed continue depth1, width1, copy1 = parse_depth_width(m1, return_seed=True) depth2, width2, copy2 = parse_depth_width(m2, return_seed=True) logging.info(m1, m2) out_dir = os.path.join( FLAGS.experiment_dir, 'cka_across_models', 'cka_across_models_depth_%d_width_%d_copy_%d_depth_%d_width_%d_copy_%d_batch_%d.pkl' % (depth1, width1, copy1, depth2, width2, copy2, FLAGS.cka_batch)) if normalize_act: out_dir = out_dir.replace('.pkl', 'normalize_activations.pkl') logging.info(out_dir) if tf.io.gfile.exists(out_dir): logging.info('Aborting...') continue model1 = tf.keras.models.load_model(m1) model2 = tf.keras.models.load_model(m2) n_layers = len(model1.layers) n_layers2 = len(model2.layers) cka = MinibatchCKA(n_layers, n_layers2, across_models=True) #cka2 = MinibatchCKA(n_layers + n_layers2) if use_batch: for _ in range(FLAGS.cka_iter): dataset = load_test_data(FLAGS.cka_batch, shuffle=True, data_path=data_path, dataset_name=dataset_name) for images, _ in dataset: activations1 = get_activations(images, model1, normalize_act) activations2 = get_activations(images, model2, normalize_act) cka.update_state_across_models(activations1, activations2) #test_CKA(n_layers, n_layers2, activations1, activations2, cka1=cka, cka2=cka2) heatmap = cka.result().numpy() with tf.io.gfile.GFile(out_dir, 'wb') as f: pickle.dump(heatmap, f)
def compute_sparsity(model_dir): """Compute level of sparsity in activations.""" model = tf.keras.models.load_model(model_dir) test_dataset = load_test_data(100) total_sparsity = None count = 0 for images, _ in test_dataset: count += 1 activations = get_activations(images, model) all_sparsity = [] for act in activations: act = act.flatten() all_sparsity.append(np.mean(act == 0)) if total_sparsity is None: total_sparsity = np.array(all_sparsity) else: total_sparsity += np.array(all_sparsity) total_sparsity /= count save_path = os.path.join(model_dir, 'activation_sparsity.txt') with tf.io.gfile.GFile(save_path, 'w') as f: json.dump({'sparsity': list(total_sparsity)}, f)
def compute_sparsity_per_unit(model_dir): """Compute level of sparsity in each layer activation.""" model = tf.keras.models.load_model(model_dir) test_dataset = load_test_data(100) total_sparsity = None count = 0 for images, _ in test_dataset: count += 1 activations = get_activations(images, model) all_sparsity = [] for act in activations: act = act.reshape([act.shape[0], -1]) all_sparsity.append(np.mean(act == 0, axis=0)) if total_sparsity is None: total_sparsity = [np.array(s) for s in all_sparsity] else: for i, s in enumerate(total_sparsity): total_sparsity[i] = s + all_sparsity[i] total_sparsity = [s / count for s in total_sparsity] save_path = os.path.join(model_dir, 'activation_sparsity_per_unit.txt') with tf.io.gfile.GFile(save_path, 'w') as f: json.dump({'sparsity': [list(s) for s in total_sparsity]}, f)
def CKA_without_dominant_images(dataset_name, model_dir, batch_size=10000, top=None, bottom=None, frac=0.5): """Compute internal CKA when {frac} of most dominant images are removed.""" # TODO: include option for concatenating activations from top to bottom and computing first PC from there if dataset_name == 'cifar10': filename = 'cka_within_model_%d_remove_%.2f_dominant_egs.pkl' % ( FLAGS.cka_batch, frac) else: suffix = dataset_name.split('_')[-1] filename = 'cka_within_model_%d_%s_remove_%.2f_dominant_egs.pkl' % ( FLAGS.cka_batch, suffix) out_dir = os.path.join(model_dir, filename) if tf.io.gfile.exists(out_dir): return model = tf.keras.models.load_model(model_dir) test_dataset = load_test_data(batch_size, dataset_name=dataset_name) images, _ = test_dataset.__iter__().next() all_activations = get_activations(images, model) bs = images.numpy().shape[0] if bottom is None: act = all_activations[top] act = act.reshape(bs, -1) processed_act = act - np.mean(act, axis=0) svd = TruncatedSVD(n_components=1, random_state=0) svd.fit(processed_act.T) act_pc = svd.components_.squeeze() n_examples = len(act_pc) outlier_idx = np.argsort(np.abs(act_pc))[ -int(n_examples * frac):] # remove the top {frac} most dominant datapoints # Create a new filtered dataset test_dataset = tfds.load(name=dataset_name, split='test', as_supervised=True) test_dataset = test_dataset.batch(1) all_images, all_labels = [], [] count = 0 for data in test_dataset.as_numpy_iterator(): all_images.append(data[0].squeeze()) all_labels.append(data[1].item()) count += 1 if count - 1 in outlier_idx: continue all_images = np.stack(all_images) # Compute internal CKA with the new dataset n_layers = len(model.layers) cka = MinibatchCKA(n_layers) for _ in range(FLAGS.cka_iter): new_dataset = tf.data.Dataset.from_tensor_slices( (all_images, all_labels)) new_dataset = new_dataset.map( functools.partial(preprocess_data, is_training=False)) new_dataset = new_dataset.shuffle(buffer_size=int(batch_size * frac)) new_dataset = new_dataset.batch(FLAGS.cka_batch, drop_remainder=False) for images, _ in new_dataset: cka.update_state(get_activations(images, model)) heatmap = cka.result().numpy() with tf.io.gfile.GFile(out_dir, 'wb') as f: pickle.dump(heatmap, f)
def save_pc_values(dataset_name, model_dir, batch_size=10000, top=None, bottom=None, save_dim_coefficients=False, frac=0.5): """Save first PC-related metadata for each layer activation. Either output projected values onto the first PC and the fraction of variation explained, or the PC itself. """ out_dir = os.path.join(model_dir, 'all_pc_values_%d.pkl' % batch_size) out_dir_explained_ratio = os.path.join( model_dir, 'all_pc_explained_ratio_%d.pkl' % batch_size) if top is not None: # remove outliers and recalculate PC out_dir = out_dir.replace('.pkl', '_no_outlier_remove_%.2f.pkl' % frac) out_dir_explained_ratio = out_dir_explained_ratio.replace( '.pkl', '_no_outlier_remove_%.2f.pkl' % frac) if tf.io.gfile.exists(out_dir_explained_ratio): exit() model = tf.keras.models.load_model(model_dir) if 'weights' in model_dir or 'copy-10' in model_dir or 'copy-11' in model_dir or 'copy-12' in model_dir: model = convert_bn_to_train_mode(model) out_dir = out_dir.replace('.pkl', '_bn_train_mode.pkl') out_dir_explained_ratio = out_dir_explained_ratio.replace( '.pkl', '_bn_train_mode.pkl') test_dataset = load_test_data(batch_size, dataset_name=dataset_name) images, _ = test_dataset.__iter__().next() all_activations = get_activations(images, model) bs = images.numpy().shape[0] all_pc_values = [] all_pc_explained_ratio = [] all_coefficients = [] all_coefficients_no_outliers = [] for i, act in enumerate(all_activations): if top is not None and (i > top or i < bottom): continue act = act.reshape(bs, -1) processed_act = act - np.mean(act, axis=0) svd = TruncatedSVD(n_components=1, random_state=0) svd.fit(processed_act.T) act_pc = svd.components_.squeeze() if save_dim_coefficients: # save the PC itself U, _, _ = randomized_svd(processed_act.T, n_components=1, n_iter=5, random_state=None) all_coefficients.append(U.squeeze()) if top is None: all_pc_values.append(act_pc) all_pc_explained_ratio.append(svd.explained_variance_ratio_[0]) else: # remove outliers for the corresponding layers n_examples = len(act_pc) outlier_idx = np.argsort(np.abs(act_pc))[int( n_examples * frac):] # remove the bottom {frac} of the data selected_idx = np.array( [i for i in range(bs) if i not in outlier_idx]) act_no_outlier = act[selected_idx, :] processed_act = act_no_outlier - np.mean(act_no_outlier, axis=0) if save_dim_coefficients: U, _, _ = randomized_svd(processed_act.T, n_components=1, n_iter=5, random_state=None) all_coefficients_no_outliers.append(U.squeeze()) else: svd = TruncatedSVD(n_components=1, random_state=0) svd.fit(processed_act.T) act_pc = svd.components_.squeeze() all_pc_values.append(act_pc) all_pc_explained_ratio.append(svd.explained_variance_ratio_[0]) if save_dim_coefficients: out_dir = os.path.join(model_dir, 'all_dim_coefficients_%d.pkl' % batch_size) pickle.dump(all_coefficients, tf.io.gfile.GFile(out_dir, 'wb')) out_dir = os.path.join( model_dir, 'all_dim_coefficients_%d_no_outlier_remove_half.pkl' % batch_size) pickle.dump(all_coefficients_no_outliers, tf.io.gfile.GFile(out_dir, 'wb')) else: pickle.dump(all_pc_values, tf.io.gfile.GFile(out_dir, 'wb')) pickle.dump(all_pc_explained_ratio, tf.io.gfile.GFile(out_dir_explained_ratio, 'wb'))
def epoch_cka(experiment_dir, use_batch=True, data_path=None, dataset_name='cifar10', last_epoch=300, curr_epoch=None, use_train_mode=False): """Compute CKA score of each layer in a final model to every other layer of the same model at earlier stages of training.""" n_layers = 0 epoch_files = [ f for f in tf.io.gfile.listdir(experiment_dir) if 'weights' in f ] if curr_epoch is not None: if curr_epoch == 0: epoch_files = [''] else: epoch_files = [ f for f in epoch_files if 'weights.%d.' % curr_epoch in f ] model_pairs = [(f, 'weights.%d.ckpt' % last_epoch) for f in epoch_files] if use_train_mode: save_dir = os.path.join(experiment_dir, 'cka_across_epochs_bn_train_mode') else: save_dir = os.path.join(experiment_dir, 'cka_across_epochs') if not tf.io.gfile.exists(save_dir): tf.io.gfile.mkdir(save_dir) for m1, m2 in model_pairs: if 'ckpt' not in m1: epoch_no = 0 else: epoch_no = int(m1.split('.')[1]) if epoch_no % 10 != 0: if epoch_no > 10: continue if epoch_no == last_epoch: continue out_dir = os.path.join( save_dir, 'batch_%d_epoch_%d_epoch_%d.pkl' % (FLAGS.cka_batch, epoch_no, last_epoch)) logging.info(out_dir) if tf.io.gfile.exists(out_dir): logging.info('Aborting...') continue model1 = tf.keras.models.load_model(os.path.join(experiment_dir, m1)) model2 = tf.keras.models.load_model(os.path.join(experiment_dir, m2)) if use_train_mode: model1 = convert_bn_to_train_mode(model1) model2 = convert_bn_to_train_mode(model2) if not n_layers: n_layers = len(model1.layers) cka = MinibatchCKA(n_layers, across_models=True) if use_batch: for _ in range(FLAGS.cka_iter): dataset = load_test_data(FLAGS.cka_batch, shuffle=True, data_path=data_path, dataset_name=dataset_name) for images, _ in dataset: activations1 = get_activations(images, model1) activations2 = get_activations(images, model2) cka.update_state_across_models(activations1, activations2) heatmap = cka.result().numpy() with tf.io.gfile.GFile(out_dir, 'wb') as f: pickle.dump(heatmap, f)