def delete_isolated_ccs_refactored(weights, adjacency_matrix, is_testing=False): """Assume that all the isolated connected components have only one node.""" # 1D boolean array of non-isolated nodes node_mask = (adjacency_matrix!=0).toarray().any(axis=1) no_isolated_adjacency_matrix = adjacency_matrix[:,node_mask][node_mask,:] if is_testing: layer_sizes = [w.shape[0] for w in weights] # create two iterators of the node mask per layer # they iterator are in shift of one (current, next) # current - slice rows in the weight matrix # next - slice columns in the weight matrix layer_mask = splitter(node_mask, layer_sizes) current_layer_mask, next_layer_mask = it.tee(layer_mask, 2) next(next_layer_mask) bi_layer_masks = it.zip_longest(current_layer_mask, next_layer_mask, fillvalue=Ellipsis) array_weights = (layer_weights.toarray() if sparse.issparse(layer_weights) else layer_weights for layer_weights in weights) # maybe need .toarray() to sparse instead of np.array no_isolated_weights = [np.array(layer_weights)[current_mask,:][:,next_mask] for layer_weights, (current_mask, next_mask) in zip(array_weights, bi_layer_masks)] else: no_isolated_weights = [] return no_isolated_weights, no_isolated_adjacency_matrix, node_mask
def get_clustering_info_imagenet( model_tag, num_clusters, savedir='/project/clusterability_in_neural_networks/results/'): assert model_tag in VIS_NETS clustering_results = run_clustering_imagenet(model_tag, num_clusters=num_clusters, with_shuffle=False, eigen_solver='arpack') layer_names = clustering_results['layer_names'] conv_connections = clustering_results['conv_connections'] layer_sizes = [cc[0]['weights'].shape[0] for cc in conv_connections[1:]] dense_sizes = get_dense_sizes(conv_connections) layer_sizes.extend(list(dense_sizes.values())) labels = clustering_results['labels'] labels_in_layers = list(splitter(labels, layer_sizes)) for nm, ly in zip(layer_names, layer_sizes): print(ly, nm) clustering_info = {'layers': layer_names, 'labels': labels_in_layers} with open(savedir + model_tag + '_clustering_info.pkl', 'wb') as f: pickle.dump(clustering_info, f)
def set_nodes_positions(nodes, layer_widths, clustering_labels, is_first_square=True, dx=50, dy=5, jitter=10): """Set postions of nodes of a neural network for networkx drawing.""" pos = {} labled_nodes_by_layer = splitter(zip(nodes, clustering_labels), layer_widths) layer_data = enumerate(zip(layer_widths, labled_nodes_by_layer)) starting_x = 0 # TODO - refactor! for layer_index, (layer_width, labled_nodes) in layer_data: nodes, labels = zip(*labled_nodes) nodes_sorted = [node for _, node in sorted(zip(labels, nodes))] # first layer is the input (image) # so let's draw it as a square! if is_first_square and layer_index == 0: nodes_sorted = nodes (xs, normalized_ys, shift_x, side) = set_square_nodes_positions(layer_width, nodes_sorted) starting_x += shift_x height = dy * shift_x else: nodes_sorted = [node for _, node in sorted(zip(labels, nodes))] starting_x += dx xs = np.full(layer_width, starting_x, dtype=float) xs += 2 * jitter * np.random.random(layer_width) - jitter xs = xs.round().astype(int) center_node = layer_width // 2 normalized_ys = ((np.arange(layer_width) - center_node) / center_node) height = dy * layer_width ys = normalized_ys * height ys = ys.round().astype(int) pos.update( {node: (x, y) for node, (x, y) in zip(nodes_sorted, zip(xs, ys))}) return pos
def build_cluster_graph(weights_path, clustering_result, normalize_in_out=True): labels, _ = clustering_result weights = load_weights(weights_path) layer_widths = extract_layer_widths(weights) G = nx.DiGraph() (label_by_layer, current_label_by_layer, next_label_by_layer) = it.tee(splitter(labels, layer_widths), 3) next_label_by_layer = it.islice(next_label_by_layer, 1, None) for layer_index, layer_labels in enumerate(label_by_layer): unique_labels = sorted(label for label in np.unique(layer_labels) if label != -1) for label in unique_labels: node_name = nodify(layer_index, label) G.add_node(node_name) edges = {} for layer_index, (current_labels, next_labels, layer_weights) in enumerate( zip(current_label_by_layer, next_label_by_layer, weights)): label_edges = it.product( (label for label in np.unique(current_labels) if label != -1), (label for label in np.unique(next_labels) if label != -1)) for current_label, next_label in label_edges: current_mask = (current_label == current_labels) next_mask = (next_label == next_labels) between_weights = layer_weights[current_mask, :][:, next_mask] if normalize_in_out: n_weight_in, n_weight_out = between_weights.shape n_weights = n_weight_in * n_weight_out normalization_factor = n_weights else: normalization_factor = 1 edge_weight = np.abs(between_weights).sum() / normalization_factor current_node = nodify(layer_index, current_label) next_node = nodify(layer_index + 1, next_label) edges[current_node, next_node] = edge_weight for nodes, weight in edges.items(): G.add_edge(*nodes, weight=weight) return G
def draw_cluster_by_layer(weights_path, clustering_result, n_clusters=4, with_text=False, size_factor=4, width_factor=30, ax=None): G = build_cluster_graph(weights_path, clustering_result) labels, _ = clustering_result weights = load_weights(weights_path) layer_widths = extract_layer_widths(weights) color_mapper = get_color_mapper(n_clusters) node_size = {} (label_by_layer, current_label_by_layer, next_label_by_layer) = it.tee(splitter(labels, layer_widths), 3) next_label_by_layer = it.islice(next_label_by_layer, 1, None) for layer_index, layer_labels in enumerate(label_by_layer): unique_labels = sorted(label for label in np.unique(layer_labels) if label != -1) for label in unique_labels: node_name = nodify(layer_index, label) node_size[node_name] = (layer_labels == label).sum() pos = nx.drawing.nx_agraph.graphviz_layout(G, prog='dot') width = [G[u][v]['weight'] * width_factor for u, v in G.edges()] node_color = [color_mapper[int(v.split('-')[1])] for v in G.nodes()] node_size = [node_size[v] * size_factor for v in G.nodes()] if ax is None: _, ax = plt.subplots(1) with warnings.catch_warnings(): warnings.simplefilter('ignore') nx.draw( G, pos, with_labels=True, node_color=node_color, node_size=node_size, # font_color='white', width=width, ax=ax) if with_text: pprint(edges) return ax
def _layers_labels_gen(network_type, layer_widths, labels, ignore_layers, to_shuffle=False, fixed=None): layer_data = zip(splitter(deepcopy(labels), layer_widths), layer_widths[:-1]) next(layer_data) for layer_id, (layer_labels, layer_width) in enumerate(layer_data, start=1): # for pool max if (ignore_layers # `layer_id-1` because we set `start=1` for `enumerate` and ignore_layers[layer_id - 1]): if verbose: print(f'Ignoring layer {layer_id-1}!') continue layer_labels = np.array(layer_labels) # do not shuffle pruned nodes if to_shuffle: # Don't shuffle pruned neurons non_shuffled_mask = (layer_labels != -1) # We preform the same operation of unpacking `fixed_layer_label` # multiple times, because I wanted to put all the "fixed" processing # in one section. if fixed is not None: fixed_layer_id, fixed_label = fixed if fixed_layer_id == layer_id: assert not (~non_shuffled_mask & (layer_labels == fixed_label)).any() non_shuffled_mask &= (layer_labels != fixed_label) layer_labels[non_shuffled_mask] = np.random.permutation( layer_labels[non_shuffled_mask]) yield layer_id, layer_labels
def cluster_and_visualize(weights_dir, activations_dir, n_clusters=10, corr_type='spearman', filter_norm=1, n_iters=20, n_random=4, side_len=28, min_size=4, max_prop=0.8): assert corr_type in ['pearson', 'spearman'] results = {} weight_path_dict = get_weights_paths(weights_dir, norm=filter_norm) activations_path_dict = get_activations_paths(activations_dir) activations_masks_path_dict = get_activation_masks_paths(activations_dir) for is_unpruned in [True, False]: # run clustering to get labels # for a cnn, this will only get results for the conv layers labels, _ = run_spectral_cluster(weight_path_dict[is_unpruned], n_clusters=n_clusters, with_shuffle=False) # get the activations and the mask with open(activations_path_dict[is_unpruned], 'rb') as f: # get stored correlation-based adjacency matrix masked_activations = pickle.load(f) with open(activations_masks_path_dict[is_unpruned], 'rb') as f: activations_mask = pickle.load(f) # the activations come pre-masked, so reconstruct them placing zeros for the units which were masked activations = np.zeros( (len(activations_mask), masked_activations.shape[-1])) activations[activations_mask] = masked_activations del masked_activations # take out the trash # get the numbers of each type of unit if 'cnn' in str(weights_dir): # if a cnn cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str( weights_dir).lower() else CNN_MODEL_PARAMS unit_nums = [cl['filters'] for cl in cnn_params['conv']] n_units = sum(unit_nums) n_dense = sum(d['units'] for d in cnn_params['dense']) n_outputs = 10 n_inputs = len(activations_mask) - n_units - n_dense - n_outputs else: # if an mlp n_inputs = 784 n_outputs = 10 unit_nums = [256, 256, 256, 256] n_units = sum(unit_nums) labels = labels[n_inputs:n_inputs + n_units] assert len(labels) == n_units # get correlations if corr_type == 'pearson': corr_mat = np.corrcoef(activations[:n_inputs + n_units], rowvar=True) else: # spearman corr_mat, _ = spearmanr(activations[:n_inputs + n_units], axis=1) # get correlations between inputs and units representations = corr_mat[n_inputs:, :n_inputs] del corr_mat # take out the trash representations[np.isnan(representations)] = 0 representations_by_layer = list(splitter(representations, unit_nums)) labels_by_layer = list(splitter(labels, unit_nums)) network_results = {} for layer_i in range(len(unit_nums)): # for each layer layer_reps = np.array(representations_by_layer[layer_i]) layer_reps_stds = np.std(layer_reps, axis=1) layer_reps_valid = layer_reps[layer_reps_stds > 0] n_valid = len(layer_reps_valid) layer_labels = np.array(labels_by_layer[layer_i]) layer_size = unit_nums[layer_i] max_size = max_prop * layer_size layer_results = {} for cluster_i in range( n_clusters): # for each sub module within the layer sm_reps = layer_reps[layer_labels == cluster_i] sm_reps_stds = np.std(sm_reps, axis=1) sm_reps = sm_reps[ sm_reps_stds > 0] # filter out ones that aren't responsive to anything sm_size = len(sm_reps) if sm_size < min_size or sm_size > max_size: # skip if too small or big continue sm_reps = align_reps(sm_reps, n_iters) true_avg = np.reshape(np.mean(sm_reps, axis=0), (-1, side_len, side_len)) if np.mean(true_avg) > 0: # align to have negative mean true_avg *= -1 avgs = [true_avg] # first in the list will be the true one for _ in range(n_random): rdm_idxs = np.random.choice(np.array(range(n_valid)), size=sm_size, replace=False) rdm_reps = layer_reps_valid[rdm_idxs] rdm_reps = align_reps(rdm_reps, n_iters) rdm_avg = np.reshape(np.mean(rdm_reps, axis=0), (-1, side_len, side_len)) if np.mean(rdm_avg) > 0: # align to have negative mean rdm_avg *= -1 avgs.append(rdm_avg) layer_results[f'cluster_{cluster_i}'] = { 'ims': avgs, 'size': sm_size } network_results[f'layer_{layer_i}'] = layer_results results[is_unpruned] = network_results return results
def perform_lesion_experiment_imagenet( network, num_clusters=10, num_shuffles=10, with_random=True, downsampled=False, eigen_solver='arpack', batch_size=32, data_dir='/project/clusterability_in_neural_networks/datasets/imagenet2012', val_tar='ILSVRC2012_img_val.tar', downsampled_n_samples=10000): assert network != 'inceptionv3', 'This function does not yet support inceptionv3' net, preprocess = Classifiers.get( network) # get network object and preprocess fn model = net((224, 224, 3), weights='imagenet') # get network tf.keras.model data_path = Path(data_dir) tfrecords = list(data_path.glob('*validation.tfrecord*')) if not tfrecords: prep_imagenet_validation_data(data_dir, val_tar) # this'll take a sec imagenet = tfds.image.Imagenet2012() # dataset builder object imagenet._data_dir = data_dir val_dataset_object = imagenet.as_dataset( split='validation') # datast object # assert isinstance(val_dataset_object, tf.data.Dataset) if downsampled: # get the ssmall dataset as an np.ndarray dataset, y = imagenet_downsampled_dataset( val_dataset_object, preprocess, n_images=downsampled_n_samples) steps = None val_set_size = downsampled_n_samples else: dataset = imagenet_generator(val_dataset_object, preprocess) val_set_size = 50000 steps = val_set_size // 250 # use batch_size of 250 y = [] # to become an ndarray of true labels for _ in range(steps): _, logits = next(dataset) y.append(np.argmax(logits, axis=-1)) y = np.concatenate(y) batch_size = None # get info from clustering clustering_results = run_clustering_imagenet(network, num_clusters=num_clusters, with_shuffle=False, eigen_solver=eigen_solver) labels = clustering_results['labels'] connections = clustering_results[ 'conv_connections'] # just connections for conv layers layer_widths = [cc[0]['weights'].shape[0] for cc in connections[1:]] # skip first conv layer dense_sizes = get_dense_sizes(connections) layer_widths.extend(list(dense_sizes.values())) labels_in_layers = list(splitter(labels, layer_widths)) y_pred = np.argmax(model.predict(dataset, steps=steps, batch_size=batch_size), axis=-1) if not isinstance(dataset, np.ndarray): dataset = imagenet_generator(val_dataset_object, preprocess) evaluation = _get_classification_accs_imagenet( y, y_pred) # an ndarray of all 1000 class accs # next get true accs and label bincounts for the 1000 classes accs_true, class_props_true, cluster_sizes = lesion_test_imagenet( model, dataset, y, labels_in_layers, num_clusters, steps, batch_size, val_dataset_object, preprocess, num_samples=1) accs_true = accs_true[0] # it's a 1 element list, so just take the first class_props_true = class_props_true[0] # same as line above if not with_random: # make and return a dict with a keys giving sub modules and values giving # num shuffles, overall acc, and class accs results = {} for layer_key in accs_true.keys(): results[layer_key] = {} for cluster_key in accs_true[layer_key].keys(): sm_results = {} true_accs = accs_true[layer_key][cluster_key] sm_results['num_shuffles'] = num_shuffles sm_results['overall_acc'] = np.mean(true_accs) sm_results['class_accs'] = true_accs results[layer_key][cluster_key] = sm_results return evaluation, results else: # perform random lesion tests num_shuffles times # get random results all_acc_random, all_class_props, _ = lesion_test_imagenet( model, dataset, y, labels_in_layers, num_clusters, steps, batch_size, val_dataset_object, preprocess, num_shuffles, shuffle=True) # make and return a dict with a keys giving sub modules and values giving # stats about true labels, shufflings, and p values for hypothesis tests results = {} for layer_key in accs_true.keys(): results[layer_key] = {} for cluster_key in accs_true[layer_key].keys(): sm_results = {} true_accs = accs_true[layer_key][cluster_key] random_accs = np.vstack([ all_acc_random[i][layer_key][cluster_key] for i in range(num_shuffles) ]) overall_acc = np.mean(true_accs) overall_random_accs = np.mean(random_accs, axis=1) overall_acc_percentile = compute_pvalue( overall_acc, overall_random_accs) overall_acc_effect_factor = np.mean( overall_random_accs) / overall_acc random_changes = random_accs - evaluation normalized_random_changes = ( random_changes.T / np.mean(random_changes, axis=-1)).T random_range_normalized_changes = np.ptp( normalized_random_changes, axis=-1) true_changes = true_accs - evaluation normalized_true_changes = true_changes / np.mean(true_changes) true_range_normalized_changes = np.ptp(normalized_true_changes) range_percentile = compute_pvalue( true_range_normalized_changes, random_range_normalized_changes, side='right') range_effect_factor = np.mean(random_range_normalized_changes ) / true_range_normalized_changes sm_results['cluster_size'] = cluster_sizes[layer_key][ cluster_key] sm_results['acc'] = overall_acc sm_results['acc_percentile'] = overall_acc_percentile sm_results[ 'overall_acc_effect_factor'] = overall_acc_effect_factor sm_results['range'] = true_range_normalized_changes sm_results['range_percentile'] = range_percentile sm_results['range_effect_factor'] = range_effect_factor results[layer_key][cluster_key] = sm_results return evaluation, results
def make_lucid_dataset( model_tag, lucid_net, all_labels, is_unpruned, transforms=[], n_random=9, min_size=5, max_prop=0.8, display=True, savedir='/project/clusterability_in_neural_networks/datasets/', savetag=''): if 'cnn' in model_tag.lower(): cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str( model_tag).lower() else CNN_MODEL_PARAMS layer_sizes = [cl['filters'] for cl in cnn_params['conv']] layer_names = ['conv2d/Relu'] + [ f'conv2d_{i}/Relu' for i in range(1, len(layer_sizes)) ] else: # it's an mlp layer_sizes = [256, 256, 256, 256] layer_names = ['dense/Relu'] + [ f'dense_{i}/Relu' for i in range(1, len(layer_sizes)) ] if not is_unpruned: layer_names = ['prune_low_magnitude_' + ln for ln in layer_names] labels_in_layers = [ np.array(lyr_labels) for lyr_labels in list(splitter(all_labels, layer_sizes)) ] max_images = [ ] # to be filled with images that maximize cluster activations random_max_images = [ ] # to be filled with images that maximize random units activations max_losses = [] # to be filled with losses random_max_losses = [] # to be filled with losses sm_sizes = [] # list of submodule sizes sm_layer_sizes = [] sm_layers = [] # list of layer names sm_clusters = [] # list of clusters imsize = IMAGE_SIZE_CIFAR10 if 'vgg' in model_tag.lower() else IMAGE_SIZE for layer_name, labels, layer_size in zip(layer_names, labels_in_layers, layer_sizes): max_size = max_prop * layer_size for clust_i in range(max(all_labels) + 1): sm_binary = labels == clust_i sm_size = sum(sm_binary) if sm_size <= min_size or sm_size >= max_size: # skip if too big or small continue sm_sizes.append(sm_size) sm_layer_sizes.append(layer_size) sm_layers.append(layer_name) sm_clusters.append(clust_i) # print(f'{model_tag}, layer: {layer_name}') # print(f'submodule_size: {sm_size}, layer_size: {layer_size}') sm_idxs = [i for i in range(layer_size) if sm_binary[i]] max_obj = sum( [objectives.channel(layer_name, unit) for unit in sm_idxs]) max_im, max_loss = render_vis_with_loss(lucid_net, max_obj, size=imsize, transforms=transforms) max_images.append(max_im) max_losses.append(max_loss) if display: print(f'loss: {round(max_loss, 3)}') show(max_im) rdm_losses = [] rdm_ims = [] for _ in range(n_random): # random max results rdm_idxs = np.random.choice(np.array(range(layer_size)), size=sm_size, replace=False) random_max_obj = sum([ objectives.channel(layer_name, unit) for unit in rdm_idxs ]) random_max_im, random_max_loss = render_vis_with_loss( lucid_net, random_max_obj, size=imsize, transforms=transforms) random_max_images.append(random_max_im) random_max_losses.append(random_max_loss) rdm_ims.append(np.squeeze(random_max_im)) rdm_losses.append(round(random_max_loss, 3)) if display: print(f'random losses: {rdm_losses}') show(np.hstack(rdm_ims)) max_images = np.squeeze(np.array(max_images)) random_max_images = np.squeeze(np.array(random_max_images)) max_losses = np.array(max_losses) random_max_losses = np.array(random_max_losses) results = { 'max_images': max_images, 'random_max_images': random_max_images, 'max_losses': max_losses, 'random_max_losses': random_max_losses, 'sm_sizes': sm_sizes, 'sm_layer_sizes': sm_layer_sizes, 'sm_layers': sm_layers, 'sm_clusters': sm_clusters } if is_unpruned: suff = '_unpruned_max_data' else: suff = '_pruned_max_data' with open(savedir + model_tag + suff + savetag + '.pkl', 'wb') as f: pickle.dump(results, f)