Exemplo n.º 1
0
def delete_isolated_ccs_refactored(weights, adjacency_matrix, is_testing=False):
    """Assume that all the isolated connected components have only one node."""
    # 1D boolean array of non-isolated nodes
    node_mask = (adjacency_matrix!=0).toarray().any(axis=1)

    no_isolated_adjacency_matrix = adjacency_matrix[:,node_mask][node_mask,:]

    
    if is_testing:
        layer_sizes = [w.shape[0] for w in weights]

        # create two iterators of the node mask per layer
        # they iterator are in shift of one (current, next)
        # current - slice rows in the weight matrix
        # next - slice columns in the weight matrix
        layer_mask = splitter(node_mask, layer_sizes)
        current_layer_mask, next_layer_mask = it.tee(layer_mask, 2)
        next(next_layer_mask)
        bi_layer_masks = it.zip_longest(current_layer_mask, next_layer_mask, fillvalue=Ellipsis)
        
        array_weights = (layer_weights.toarray() if sparse.issparse(layer_weights)
                         else layer_weights
                         for layer_weights in weights)

        # maybe need .toarray() to sparse instead of np.array
        no_isolated_weights = [np.array(layer_weights)[current_mask,:][:,next_mask]
                               for layer_weights, (current_mask, next_mask)
                               in zip(array_weights, bi_layer_masks)]
    else:
        no_isolated_weights = []

    return no_isolated_weights, no_isolated_adjacency_matrix, node_mask
Exemplo n.º 2
0
def get_clustering_info_imagenet(
        model_tag,
        num_clusters,
        savedir='/project/clusterability_in_neural_networks/results/'):

    assert model_tag in VIS_NETS

    clustering_results = run_clustering_imagenet(model_tag,
                                                 num_clusters=num_clusters,
                                                 with_shuffle=False,
                                                 eigen_solver='arpack')

    layer_names = clustering_results['layer_names']
    conv_connections = clustering_results['conv_connections']
    layer_sizes = [cc[0]['weights'].shape[0] for cc in conv_connections[1:]]
    dense_sizes = get_dense_sizes(conv_connections)
    layer_sizes.extend(list(dense_sizes.values()))
    labels = clustering_results['labels']
    labels_in_layers = list(splitter(labels, layer_sizes))

    for nm, ly in zip(layer_names, layer_sizes):
        print(ly, nm)

    clustering_info = {'layers': layer_names, 'labels': labels_in_layers}

    with open(savedir + model_tag + '_clustering_info.pkl', 'wb') as f:
        pickle.dump(clustering_info, f)
def set_nodes_positions(nodes,
                        layer_widths,
                        clustering_labels,
                        is_first_square=True,
                        dx=50,
                        dy=5,
                        jitter=10):
    """Set postions of nodes of a neural network for networkx drawing."""

    pos = {}

    labled_nodes_by_layer = splitter(zip(nodes, clustering_labels),
                                     layer_widths)

    layer_data = enumerate(zip(layer_widths, labled_nodes_by_layer))

    starting_x = 0

    # TODO - refactor!
    for layer_index, (layer_width, labled_nodes) in layer_data:

        nodes, labels = zip(*labled_nodes)

        nodes_sorted = [node for _, node in sorted(zip(labels, nodes))]

        # first layer is the input (image)
        # so let's draw it as a square!
        if is_first_square and layer_index == 0:
            nodes_sorted = nodes

            (xs, normalized_ys, shift_x,
             side) = set_square_nodes_positions(layer_width, nodes_sorted)
            starting_x += shift_x
            height = dy * shift_x

        else:
            nodes_sorted = [node for _, node in sorted(zip(labels, nodes))]

            starting_x += dx

            xs = np.full(layer_width, starting_x, dtype=float)
            xs += 2 * jitter * np.random.random(layer_width) - jitter
            xs = xs.round().astype(int)

            center_node = layer_width // 2

            normalized_ys = ((np.arange(layer_width) - center_node) /
                             center_node)
            height = dy * layer_width

        ys = normalized_ys * height
        ys = ys.round().astype(int)

        pos.update(
            {node: (x, y)
             for node, (x, y) in zip(nodes_sorted, zip(xs, ys))})

    return pos
def build_cluster_graph(weights_path,
                        clustering_result,
                        normalize_in_out=True):

    labels, _ = clustering_result

    weights = load_weights(weights_path)
    layer_widths = extract_layer_widths(weights)

    G = nx.DiGraph()

    (label_by_layer, current_label_by_layer,
     next_label_by_layer) = it.tee(splitter(labels, layer_widths), 3)

    next_label_by_layer = it.islice(next_label_by_layer, 1, None)

    for layer_index, layer_labels in enumerate(label_by_layer):
        unique_labels = sorted(label for label in np.unique(layer_labels)
                               if label != -1)
        for label in unique_labels:
            node_name = nodify(layer_index, label)
            G.add_node(node_name)

    edges = {}

    for layer_index, (current_labels, next_labels, layer_weights) in enumerate(
            zip(current_label_by_layer, next_label_by_layer, weights)):

        label_edges = it.product(
            (label for label in np.unique(current_labels) if label != -1),
            (label for label in np.unique(next_labels) if label != -1))

        for current_label, next_label in label_edges:

            current_mask = (current_label == current_labels)
            next_mask = (next_label == next_labels)

            between_weights = layer_weights[current_mask, :][:, next_mask]

            if normalize_in_out:
                n_weight_in, n_weight_out = between_weights.shape
                n_weights = n_weight_in * n_weight_out
                normalization_factor = n_weights
            else:
                normalization_factor = 1

            edge_weight = np.abs(between_weights).sum() / normalization_factor

            current_node = nodify(layer_index, current_label)
            next_node = nodify(layer_index + 1, next_label)

            edges[current_node, next_node] = edge_weight

    for nodes, weight in edges.items():
        G.add_edge(*nodes, weight=weight)

    return G
def draw_cluster_by_layer(weights_path,
                          clustering_result,
                          n_clusters=4,
                          with_text=False,
                          size_factor=4,
                          width_factor=30,
                          ax=None):

    G = build_cluster_graph(weights_path, clustering_result)

    labels, _ = clustering_result

    weights = load_weights(weights_path)
    layer_widths = extract_layer_widths(weights)

    color_mapper = get_color_mapper(n_clusters)

    node_size = {}

    (label_by_layer, current_label_by_layer,
     next_label_by_layer) = it.tee(splitter(labels, layer_widths), 3)

    next_label_by_layer = it.islice(next_label_by_layer, 1, None)

    for layer_index, layer_labels in enumerate(label_by_layer):
        unique_labels = sorted(label for label in np.unique(layer_labels)
                               if label != -1)
        for label in unique_labels:
            node_name = nodify(layer_index, label)
            node_size[node_name] = (layer_labels == label).sum()

    pos = nx.drawing.nx_agraph.graphviz_layout(G, prog='dot')
    width = [G[u][v]['weight'] * width_factor for u, v in G.edges()]
    node_color = [color_mapper[int(v.split('-')[1])] for v in G.nodes()]
    node_size = [node_size[v] * size_factor for v in G.nodes()]

    if ax is None:
        _, ax = plt.subplots(1)

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')

        nx.draw(
            G,
            pos,
            with_labels=True,
            node_color=node_color,
            node_size=node_size,
            # font_color='white',
            width=width,
            ax=ax)

    if with_text:
        pprint(edges)

    return ax
Exemplo n.º 6
0
def _layers_labels_gen(network_type,
                       layer_widths,
                       labels,
                       ignore_layers,
                       to_shuffle=False,
                       fixed=None):

    layer_data = zip(splitter(deepcopy(labels), layer_widths),
                     layer_widths[:-1])
    next(layer_data)

    for layer_id, (layer_labels, layer_width) in enumerate(layer_data,
                                                           start=1):

        # for pool max
        if (ignore_layers
                # `layer_id-1` because we set `start=1` for `enumerate`
                and ignore_layers[layer_id - 1]):

            if verbose:
                print(f'Ignoring layer {layer_id-1}!')

            continue

        layer_labels = np.array(layer_labels)

        # do not shuffle pruned nodes
        if to_shuffle:

            # Don't shuffle pruned neurons
            non_shuffled_mask = (layer_labels != -1)

            # We preform the same operation of unpacking `fixed_layer_label`
            # multiple times, because I wanted to put all the "fixed" processing
            # in one section.
            if fixed is not None:
                fixed_layer_id, fixed_label = fixed
                if fixed_layer_id == layer_id:

                    assert not (~non_shuffled_mask
                                & (layer_labels == fixed_label)).any()

                    non_shuffled_mask &= (layer_labels != fixed_label)

            layer_labels[non_shuffled_mask] = np.random.permutation(
                layer_labels[non_shuffled_mask])

        yield layer_id, layer_labels
def cluster_and_visualize(weights_dir,
                          activations_dir,
                          n_clusters=10,
                          corr_type='spearman',
                          filter_norm=1,
                          n_iters=20,
                          n_random=4,
                          side_len=28,
                          min_size=4,
                          max_prop=0.8):
    assert corr_type in ['pearson', 'spearman']

    results = {}

    weight_path_dict = get_weights_paths(weights_dir, norm=filter_norm)
    activations_path_dict = get_activations_paths(activations_dir)
    activations_masks_path_dict = get_activation_masks_paths(activations_dir)

    for is_unpruned in [True, False]:

        # run clustering to get labels
        # for a cnn, this will only get results for the conv layers
        labels, _ = run_spectral_cluster(weight_path_dict[is_unpruned],
                                         n_clusters=n_clusters,
                                         with_shuffle=False)

        # get the activations and the mask
        with open(activations_path_dict[is_unpruned],
                  'rb') as f:  # get stored correlation-based adjacency matrix
            masked_activations = pickle.load(f)
        with open(activations_masks_path_dict[is_unpruned], 'rb') as f:
            activations_mask = pickle.load(f)

        # the activations come pre-masked, so reconstruct them placing zeros for the units which were masked
        activations = np.zeros(
            (len(activations_mask), masked_activations.shape[-1]))
        activations[activations_mask] = masked_activations
        del masked_activations  # take out the trash

        # get the numbers of each type of unit
        if 'cnn' in str(weights_dir):  # if a cnn
            cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str(
                weights_dir).lower() else CNN_MODEL_PARAMS
            unit_nums = [cl['filters'] for cl in cnn_params['conv']]
            n_units = sum(unit_nums)
            n_dense = sum(d['units'] for d in cnn_params['dense'])
            n_outputs = 10
            n_inputs = len(activations_mask) - n_units - n_dense - n_outputs
        else:  # if an mlp
            n_inputs = 784
            n_outputs = 10
            unit_nums = [256, 256, 256, 256]
            n_units = sum(unit_nums)
            labels = labels[n_inputs:n_inputs + n_units]

        assert len(labels) == n_units

        # get correlations
        if corr_type == 'pearson':
            corr_mat = np.corrcoef(activations[:n_inputs + n_units],
                                   rowvar=True)
        else:  # spearman
            corr_mat, _ = spearmanr(activations[:n_inputs + n_units], axis=1)

        # get correlations between inputs and units
        representations = corr_mat[n_inputs:, :n_inputs]
        del corr_mat  # take out the trash
        representations[np.isnan(representations)] = 0

        representations_by_layer = list(splitter(representations, unit_nums))
        labels_by_layer = list(splitter(labels, unit_nums))
        network_results = {}

        for layer_i in range(len(unit_nums)):  # for each layer

            layer_reps = np.array(representations_by_layer[layer_i])
            layer_reps_stds = np.std(layer_reps, axis=1)
            layer_reps_valid = layer_reps[layer_reps_stds > 0]
            n_valid = len(layer_reps_valid)
            layer_labels = np.array(labels_by_layer[layer_i])
            layer_size = unit_nums[layer_i]
            max_size = max_prop * layer_size

            layer_results = {}

            for cluster_i in range(
                    n_clusters):  # for each sub module within the layer

                sm_reps = layer_reps[layer_labels == cluster_i]
                sm_reps_stds = np.std(sm_reps, axis=1)
                sm_reps = sm_reps[
                    sm_reps_stds >
                    0]  # filter out ones that aren't responsive to anything
                sm_size = len(sm_reps)

                if sm_size < min_size or sm_size > max_size:  # skip if too small or big
                    continue

                sm_reps = align_reps(sm_reps, n_iters)
                true_avg = np.reshape(np.mean(sm_reps, axis=0),
                                      (-1, side_len, side_len))
                if np.mean(true_avg) > 0:  # align to have negative mean
                    true_avg *= -1
                avgs = [true_avg]  # first in the list will be the true one

                for _ in range(n_random):

                    rdm_idxs = np.random.choice(np.array(range(n_valid)),
                                                size=sm_size,
                                                replace=False)
                    rdm_reps = layer_reps_valid[rdm_idxs]
                    rdm_reps = align_reps(rdm_reps, n_iters)
                    rdm_avg = np.reshape(np.mean(rdm_reps, axis=0),
                                         (-1, side_len, side_len))
                    if np.mean(rdm_avg) > 0:  # align to have negative mean
                        rdm_avg *= -1
                    avgs.append(rdm_avg)

                layer_results[f'cluster_{cluster_i}'] = {
                    'ims': avgs,
                    'size': sm_size
                }

            network_results[f'layer_{layer_i}'] = layer_results

        results[is_unpruned] = network_results

    return results
def perform_lesion_experiment_imagenet(
        network,
        num_clusters=10,
        num_shuffles=10,
        with_random=True,
        downsampled=False,
        eigen_solver='arpack',
        batch_size=32,
        data_dir='/project/clusterability_in_neural_networks/datasets/imagenet2012',
        val_tar='ILSVRC2012_img_val.tar',
        downsampled_n_samples=10000):

    assert network != 'inceptionv3', 'This function does not yet support inceptionv3'

    net, preprocess = Classifiers.get(
        network)  # get network object and preprocess fn
    model = net((224, 224, 3),
                weights='imagenet')  # get network tf.keras.model

    data_path = Path(data_dir)
    tfrecords = list(data_path.glob('*validation.tfrecord*'))
    if not tfrecords:
        prep_imagenet_validation_data(data_dir, val_tar)  # this'll take a sec
    imagenet = tfds.image.Imagenet2012()  # dataset builder object
    imagenet._data_dir = data_dir
    val_dataset_object = imagenet.as_dataset(
        split='validation')  # datast object
    # assert isinstance(val_dataset_object, tf.data.Dataset)

    if downsampled:
        # get the ssmall dataset as an np.ndarray
        dataset, y = imagenet_downsampled_dataset(
            val_dataset_object, preprocess, n_images=downsampled_n_samples)
        steps = None
        val_set_size = downsampled_n_samples

    else:
        dataset = imagenet_generator(val_dataset_object, preprocess)
        val_set_size = 50000
        steps = val_set_size // 250  # use batch_size of 250
        y = []  # to become an ndarray of true labels
        for _ in range(steps):
            _, logits = next(dataset)
            y.append(np.argmax(logits, axis=-1))
        y = np.concatenate(y)
        batch_size = None

    # get info from clustering
    clustering_results = run_clustering_imagenet(network,
                                                 num_clusters=num_clusters,
                                                 with_shuffle=False,
                                                 eigen_solver=eigen_solver)
    labels = clustering_results['labels']
    connections = clustering_results[
        'conv_connections']  # just connections for conv layers
    layer_widths = [cc[0]['weights'].shape[0]
                    for cc in connections[1:]]  # skip first conv layer
    dense_sizes = get_dense_sizes(connections)
    layer_widths.extend(list(dense_sizes.values()))
    labels_in_layers = list(splitter(labels, layer_widths))

    y_pred = np.argmax(model.predict(dataset,
                                     steps=steps,
                                     batch_size=batch_size),
                       axis=-1)
    if not isinstance(dataset, np.ndarray):
        dataset = imagenet_generator(val_dataset_object, preprocess)
    evaluation = _get_classification_accs_imagenet(
        y, y_pred)  # an ndarray of all 1000 class accs

    # next get true accs and label bincounts for the 1000 classes
    accs_true, class_props_true, cluster_sizes = lesion_test_imagenet(
        model,
        dataset,
        y,
        labels_in_layers,
        num_clusters,
        steps,
        batch_size,
        val_dataset_object,
        preprocess,
        num_samples=1)
    accs_true = accs_true[0]  # it's a 1 element list, so just take the first
    class_props_true = class_props_true[0]  # same as line above

    if not with_random:

        # make and return a dict with a keys giving sub modules and values giving
        # num shuffles, overall acc, and class accs

        results = {}
        for layer_key in accs_true.keys():
            results[layer_key] = {}
            for cluster_key in accs_true[layer_key].keys():
                sm_results = {}
                true_accs = accs_true[layer_key][cluster_key]
                sm_results['num_shuffles'] = num_shuffles
                sm_results['overall_acc'] = np.mean(true_accs)
                sm_results['class_accs'] = true_accs
                results[layer_key][cluster_key] = sm_results

        return evaluation, results

    else:

        # perform random lesion tests num_shuffles times

        # get random results
        all_acc_random, all_class_props, _ = lesion_test_imagenet(
            model,
            dataset,
            y,
            labels_in_layers,
            num_clusters,
            steps,
            batch_size,
            val_dataset_object,
            preprocess,
            num_shuffles,
            shuffle=True)

        # make and return a dict with a keys giving sub modules and values giving
        # stats about true labels, shufflings, and p values for hypothesis tests

        results = {}
        for layer_key in accs_true.keys():
            results[layer_key] = {}
            for cluster_key in accs_true[layer_key].keys():

                sm_results = {}

                true_accs = accs_true[layer_key][cluster_key]
                random_accs = np.vstack([
                    all_acc_random[i][layer_key][cluster_key]
                    for i in range(num_shuffles)
                ])
                overall_acc = np.mean(true_accs)
                overall_random_accs = np.mean(random_accs, axis=1)
                overall_acc_percentile = compute_pvalue(
                    overall_acc, overall_random_accs)
                overall_acc_effect_factor = np.mean(
                    overall_random_accs) / overall_acc

                random_changes = random_accs - evaluation
                normalized_random_changes = (
                    random_changes.T / np.mean(random_changes, axis=-1)).T
                random_range_normalized_changes = np.ptp(
                    normalized_random_changes, axis=-1)
                true_changes = true_accs - evaluation
                normalized_true_changes = true_changes / np.mean(true_changes)
                true_range_normalized_changes = np.ptp(normalized_true_changes)
                range_percentile = compute_pvalue(
                    true_range_normalized_changes,
                    random_range_normalized_changes,
                    side='right')
                range_effect_factor = np.mean(random_range_normalized_changes
                                              ) / true_range_normalized_changes

                sm_results['cluster_size'] = cluster_sizes[layer_key][
                    cluster_key]
                sm_results['acc'] = overall_acc
                sm_results['acc_percentile'] = overall_acc_percentile
                sm_results[
                    'overall_acc_effect_factor'] = overall_acc_effect_factor
                sm_results['range'] = true_range_normalized_changes
                sm_results['range_percentile'] = range_percentile
                sm_results['range_effect_factor'] = range_effect_factor

                results[layer_key][cluster_key] = sm_results

        return evaluation, results
Exemplo n.º 9
0
def make_lucid_dataset(
        model_tag,
        lucid_net,
        all_labels,
        is_unpruned,
        transforms=[],
        n_random=9,
        min_size=5,
        max_prop=0.8,
        display=True,
        savedir='/project/clusterability_in_neural_networks/datasets/',
        savetag=''):

    if 'cnn' in model_tag.lower():
        cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str(
            model_tag).lower() else CNN_MODEL_PARAMS
        layer_sizes = [cl['filters'] for cl in cnn_params['conv']]
        layer_names = ['conv2d/Relu'] + [
            f'conv2d_{i}/Relu' for i in range(1, len(layer_sizes))
        ]
    else:  # it's an mlp
        layer_sizes = [256, 256, 256, 256]
        layer_names = ['dense/Relu'] + [
            f'dense_{i}/Relu' for i in range(1, len(layer_sizes))
        ]
    if not is_unpruned:
        layer_names = ['prune_low_magnitude_' + ln for ln in layer_names]

    labels_in_layers = [
        np.array(lyr_labels)
        for lyr_labels in list(splitter(all_labels, layer_sizes))
    ]

    max_images = [
    ]  # to be filled with images that maximize cluster activations
    random_max_images = [
    ]  # to be filled with images that maximize random units activations
    max_losses = []  # to be filled with losses
    random_max_losses = []  # to be filled with losses
    sm_sizes = []  # list of submodule sizes
    sm_layer_sizes = []
    sm_layers = []  # list of layer names
    sm_clusters = []  # list of clusters

    imsize = IMAGE_SIZE_CIFAR10 if 'vgg' in model_tag.lower() else IMAGE_SIZE

    for layer_name, labels, layer_size in zip(layer_names, labels_in_layers,
                                              layer_sizes):

        max_size = max_prop * layer_size

        for clust_i in range(max(all_labels) + 1):

            sm_binary = labels == clust_i
            sm_size = sum(sm_binary)
            if sm_size <= min_size or sm_size >= max_size:  # skip if too big or small
                continue

            sm_sizes.append(sm_size)
            sm_layer_sizes.append(layer_size)
            sm_layers.append(layer_name)
            sm_clusters.append(clust_i)

            # print(f'{model_tag}, layer: {layer_name}')
            # print(f'submodule_size: {sm_size}, layer_size: {layer_size}')

            sm_idxs = [i for i in range(layer_size) if sm_binary[i]]
            max_obj = sum(
                [objectives.channel(layer_name, unit) for unit in sm_idxs])

            max_im, max_loss = render_vis_with_loss(lucid_net,
                                                    max_obj,
                                                    size=imsize,
                                                    transforms=transforms)
            max_images.append(max_im)
            max_losses.append(max_loss)
            if display:
                print(f'loss: {round(max_loss, 3)}')
                show(max_im)

            rdm_losses = []
            rdm_ims = []
            for _ in range(n_random):  # random max results
                rdm_idxs = np.random.choice(np.array(range(layer_size)),
                                            size=sm_size,
                                            replace=False)
                random_max_obj = sum([
                    objectives.channel(layer_name, unit) for unit in rdm_idxs
                ])
                random_max_im, random_max_loss = render_vis_with_loss(
                    lucid_net,
                    random_max_obj,
                    size=imsize,
                    transforms=transforms)
                random_max_images.append(random_max_im)
                random_max_losses.append(random_max_loss)
                rdm_ims.append(np.squeeze(random_max_im))
                rdm_losses.append(round(random_max_loss, 3))
            if display:
                print(f'random losses: {rdm_losses}')
                show(np.hstack(rdm_ims))

    max_images = np.squeeze(np.array(max_images))
    random_max_images = np.squeeze(np.array(random_max_images))
    max_losses = np.array(max_losses)
    random_max_losses = np.array(random_max_losses)

    results = {
        'max_images': max_images,
        'random_max_images': random_max_images,
        'max_losses': max_losses,
        'random_max_losses': random_max_losses,
        'sm_sizes': sm_sizes,
        'sm_layer_sizes': sm_layer_sizes,
        'sm_layers': sm_layers,
        'sm_clusters': sm_clusters
    }

    if is_unpruned:
        suff = '_unpruned_max_data'
    else:
        suff = '_pruned_max_data'

    with open(savedir + model_tag + suff + savetag + '.pkl', 'wb') as f:
        pickle.dump(results, f)