示例#1
0
def draw_clustered_mlp(weights_path,
                       clustering_result,
                       n_clusters=4,
                       is_first_square=True,
                       ax=None):
    """Draw MLP with its spectral clustering."""

    weights = load_weights(weights_path)
    layer_widths = extact_layer_widths(weights)

    labels, metrics = clustering_result

    G = nx.from_scipy_sparse_matrix(weights_to_graph(weights))

    pos = set_nodes_positions(G.nodes, layer_widths, labels, is_first_square)

    color_mapper = get_color_mapper(n_clusters)

    color_map = [color_mapper[label] for label in labels]

    if ax is None:
        _, ax = plt.subplots(1)

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        nx.draw(G, pos=pos, node_color=color_map, width=0, node_size=10, ax=ax)

    draw_metrics(metrics, ax)

    return ax, labels, metrics
def plot_eigenvalues_old(weights_path, n_eigenvalues=None, ax=None, **kwargs):
    warnings.warn('deprecated', DeprecationWarning)

    loaded_weights = load_weights(weights_path)

    G = nx.from_scipy_sparse_matrix(weights_to_graph(loaded_weights))
    G_nn = G.subgraph(max(nx.connected_components(G), key=len))
    assert nx.is_connected(G_nn)

    nrom_laplacian_matrics = nx.normalized_laplacian_matrix(G_nn)
    eigen_values = np.sort(np.linalg.eigvals(nrom_laplacian_matrics.A))

    if n_eigenvalues == None:
        start, end = 0, len(G_nn)
    elif isinstance(n_eigenvalues, int):
        start, end = 0, n_eigenvalues
    elif isinstance(n_eigenvalues, tuple):
        start, end = n_eigenvalues
    else:
        raise TypeError(
            'n_eigenvalues should be either None or int or tuple or slice.')

    eigen_values = eigen_values[start:end]

    if ax is None:
        _, ax = plt.subplots(1)

    ax.xaxis.set_major_locator(MaxNLocator(integer=True))

    if 'linestyle' not in kwargs:
        kwargs['linestyle'] = 'none'
        kwargs['marker'] = '*'
        kwargs['markersize'] = 5

    return ax.plot(range(start + 1, end + 1), eigen_values, **kwargs)
def build_cluster_graph(weights_path,
                        clustering_result,
                        normalize_in_out=True):

    labels, _ = clustering_result

    weights = load_weights(weights_path)
    layer_widths = extract_layer_widths(weights)

    G = nx.DiGraph()

    (label_by_layer, current_label_by_layer,
     next_label_by_layer) = it.tee(splitter(labels, layer_widths), 3)

    next_label_by_layer = it.islice(next_label_by_layer, 1, None)

    for layer_index, layer_labels in enumerate(label_by_layer):
        unique_labels = sorted(label for label in np.unique(layer_labels)
                               if label != -1)
        for label in unique_labels:
            node_name = nodify(layer_index, label)
            G.add_node(node_name)

    edges = {}

    for layer_index, (current_labels, next_labels, layer_weights) in enumerate(
            zip(current_label_by_layer, next_label_by_layer, weights)):

        label_edges = it.product(
            (label for label in np.unique(current_labels) if label != -1),
            (label for label in np.unique(next_labels) if label != -1))

        for current_label, next_label in label_edges:

            current_mask = (current_label == current_labels)
            next_mask = (next_label == next_labels)

            between_weights = layer_weights[current_mask, :][:, next_mask]

            if normalize_in_out:
                n_weight_in, n_weight_out = between_weights.shape
                n_weights = n_weight_in * n_weight_out
                normalization_factor = n_weights
            else:
                normalization_factor = 1

            edge_weight = np.abs(between_weights).sum() / normalization_factor

            current_node = nodify(layer_index, current_label)
            next_node = nodify(layer_index + 1, next_label)

            edges[current_node, next_node] = edge_weight

    for nodes, weight in edges.items():
        G.add_edge(*nodes, weight=weight)

    return G
def draw_cluster_by_layer(weights_path,
                          clustering_result,
                          n_clusters=4,
                          with_text=False,
                          size_factor=4,
                          width_factor=30,
                          ax=None):

    G = build_cluster_graph(weights_path, clustering_result)

    labels, _ = clustering_result

    weights = load_weights(weights_path)
    layer_widths = extract_layer_widths(weights)

    color_mapper = get_color_mapper(n_clusters)

    node_size = {}

    (label_by_layer, current_label_by_layer,
     next_label_by_layer) = it.tee(splitter(labels, layer_widths), 3)

    next_label_by_layer = it.islice(next_label_by_layer, 1, None)

    for layer_index, layer_labels in enumerate(label_by_layer):
        unique_labels = sorted(label for label in np.unique(layer_labels)
                               if label != -1)
        for label in unique_labels:
            node_name = nodify(layer_index, label)
            node_size[node_name] = (layer_labels == label).sum()

    pos = nx.drawing.nx_agraph.graphviz_layout(G, prog='dot')
    width = [G[u][v]['weight'] * width_factor for u, v in G.edges()]
    node_color = [color_mapper[int(v.split('-')[1])] for v in G.nodes()]
    node_size = [node_size[v] * size_factor for v in G.nodes()]

    if ax is None:
        _, ax = plt.subplots(1)

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')

        nx.draw(
            G,
            pos,
            with_labels=True,
            node_color=node_color,
            node_size=node_size,
            # font_color='white',
            width=width,
            ax=ax)

    if with_text:
        pprint(edges)

    return ax
def do_clustering_weights(network_type, weights_path, n_clusters, n_inputs,
                          n_outputs, exclude_inputs, eigen_solver,
                          assign_labels, use_inv_avg_commute, filter_norm,
                          epsilon):

    weights_ = load_weights(weights_path)

    if any(len(wgts.shape) > 2 for wgts in weights_):
        weights_ = extract_cnn_weights_filters_as_units(weights_, filter_norm)
    if network_type == 'cnn':  # for the cnns, only look at conv layers
        cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str(
            weights_path).lower() else CNN_MODEL_PARAMS
        n_conv_layers = len(cnn_params['conv'])
        weights_ = weights_[1:n_conv_layers]  # n_conv_layers is in the config
    elif exclude_inputs:
        weights_ = weights_[1:-1]  # exclude inputs and outputs

    adj_mat_ = weights_to_graph(weights_)

    # delete unconnected components from the net
    _, adj_mat, weight_mask, _ = delete_isolated_ccs_refactored(
        weights_, adj_mat_, is_testing=False)

    if use_inv_avg_commute:
        adj_mat = get_inv_avg_commute_time(adj_mat)

    # find cluster quality of this pruned net
    print("\nclustering unshuffled weights\n")
    unshuffled_ncut, clustering_labels = weights_array_to_cluster_quality(
        None,
        adj_mat,
        n_clusters,
        eigen_solver,
        assign_labels,
        epsilon,
        is_testing=False)
    ave_in_out = (1 - unshuffled_ncut / n_clusters) / (2 * unshuffled_ncut /
                                                       n_clusters)
    ent = entropy(clustering_labels)
    label_proportions = np.bincount(clustering_labels) / len(clustering_labels)
    result = {
        'ncut': unshuffled_ncut,
        'ave_in_out': ave_in_out,
        'mask': weight_mask,  # node_mask is a 1d length n_unit boolean array
        'labels': clustering_labels,
        'label_proportions': label_proportions,
        'entropy': ent
    }
    return result
def draw_clustered_mlp(weights_path,
                       clustering_result,
                       n_clusters=4,
                       is_first_square=True,
                       ax=None):
    """Draw MLP with its spectral clustering."""

    weights = load_weights(weights_path)
    layer_widths = extract_layer_widths(weights)
    if 'cnn' in str(
            weights_path).lower():  # if cnn, omit input layer and fc layers
        is_first_square = False
        cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str(
            weights_path).lower() else CNN_MODEL_PARAMS
        n_conv_layers = len(cnn_params['conv'])
        weights = weights[1:n_conv_layers]
        layer_widths = layer_widths[1:n_conv_layers + 1]

    labels, metrics = clustering_result

    G = nx.from_scipy_sparse_matrix(weights_to_graph(weights))

    pos = set_nodes_positions(G.nodes, layer_widths, labels, is_first_square)

    color_mapper = get_color_mapper(n_clusters)

    color_map = [color_mapper[label] for label in labels]

    if ax is None:
        _, ax = plt.subplots(1)

    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        nx.draw(G, pos=pos, node_color=color_map, width=0, node_size=10, ax=ax)

    draw_metrics(metrics, ax)

    return ax, labels, metrics
def plot_eigenvalues(weights_path,
                     n_eigenvalues=None,
                     ax=None,
                     filter_norm=1,
                     **kwargs):

    weights = load_weights(weights_path)

    if 'cnn' in str(weights_path):
        # weights, _ = extract_cnn_weights(weights, with_avg=True) #(max_weight_convention=='one_on_n'))
        weights = extract_cnn_weights_filters_as_units(
            weights, filter_norm)  #(max_weight_convention=='one_on_n'))

    # TODO: take simpler solution from delete_isolated_ccs_refactored
    adj_mat = weights_to_graph(weights)

    _, components = sparse.csgraph.connected_components(adj_mat)

    most_common_component_counts = Counter(components).most_common(2)
    main_component_id = most_common_component_counts[0][0]
    assert (len(most_common_component_counts) == 1
            or most_common_component_counts[1][1] == 1)

    main_component_mask = (components == main_component_id)

    selected_adj_mat = adj_mat[main_component_mask, :][:, main_component_mask]

    nrom_laplacian_matrix = sparse.csgraph.laplacian(selected_adj_mat,
                                                     normed=True)

    if n_eigenvalues == None:
        start, end = 0, selected_adj_mat.shape[0] - 2
    elif isinstance(n_eigenvalues, int):
        start, end = 0, n_eigenvalues
    elif isinstance(n_eigenvalues, tuple):
        start, end = n_eigenvalues
    else:
        raise TypeError(
            'n_eigenvalues should be either None or int or tuple or slice.')
    """
    eigen_values, _ = sparse.linalg.eigs(nrom_laplacian_matrix, k=end,
                                         which='SM')
    """

    sigma = 1

    OP = nrom_laplacian_matrix - sigma * sparse.eye(
        nrom_laplacian_matrix.shape[0])
    OPinv = sparse.linalg.LinearOperator(
        matvec=lambda v: sparse.linalg.minres(OP, v, tol=1e-5)[0],
        shape=nrom_laplacian_matrix.shape,
        dtype=nrom_laplacian_matrix.dtype)
    eigen_values, _ = sparse.linalg.eigsh(nrom_laplacian_matrix,
                                          sigma=sigma,
                                          k=end,
                                          which='LM',
                                          tol=1e-5,
                                          OPinv=OPinv)

    eigen_values = np.sort(eigen_values)

    eigen_values = eigen_values[start:end]

    if ax is None:
        _, ax = plt.subplots(1)

    ax.xaxis.set_major_locator(MaxNLocator(integer=True))

    if 'linestyle' not in kwargs:
        kwargs['linestyle'] = 'none'
        kwargs['marker'] = '*'
        kwargs['markersize'] = 5

    return ax.plot(range(start + 1, end + 1), eigen_values, **kwargs)
示例#8
0
def run_clustering(weights_path, num_clusters, eigen_solver, assign_labels,
                   epsilon, num_samples, delete_isolated_ccs_bool, network_type,
                   shuffle_smaller_model,
                   with_labels, with_shuffle, shuffle_method, n_workers,
                   is_testing, with_shuffled_ncuts):
    # t0 = time.time()
    # load weights and get adjacency matrix
    if is_testing:
        assert network_type == 'cnn'

    loaded_weights = load_weights(weights_path)
    if network_type == 'mlp':
        weights_ = loaded_weights
        adj_mat_ = weights_to_graph(loaded_weights)
    elif network_type == 'cnn':

        # comparing current and previous version of expanding CNN
        if is_testing:
            tester_cnn_tensors_to_flat_weights_and_graph(loaded_weights)

        weights_, adj_mat_ = cnn_tensors_to_flat_weights_and_graph(loaded_weights)

    else:
        raise ValueError("network_type must be 'mlp' or 'cnn'")

    # t1 = time.time()
    # print('time to form adjacency matrix', t1 - t0)

    # analyse connectivity structure of network
    # cc_dict = connected_comp_analysis(weights_, adj_mat_)
    # print("connectivity analysis:", cc_dict)

   
    if delete_isolated_ccs_bool:
        # delete unconnected components from the net
        weights, adj_mat, node_mask = delete_isolated_ccs_refactored(weights_, adj_mat_,
                                                                     is_testing=is_testing)
        
        if is_testing:
            weights_old, adj_mat_old = delete_isolated_ccs(weights_, adj_mat_)
            assert (adj_mat != adj_mat_old).sum() == 0
            assert all((w1 == w2).all() for w1, w2 in zip(weights, weights_old))
    
    else:
        weights, adj_mat = weights_, adj_mat_
        node_mask = numpy.full(adj_mat.shape[0], True)

    # t2 = time.time()
    # print("time to delete isolated ccs", t2 - t1)
    
    # find cluster quality of this pruned net
    print("\nclustering unshuffled weights\n")
    unshuffled_ncut, clustering_labels = weights_array_to_cluster_quality(weights, adj_mat,
                                                                          num_clusters,
                                                                          eigen_solver,
                                                                          assign_labels, epsilon,
                                                                          is_testing)
    ave_in_out = (1 - unshuffled_ncut / num_clusters) / (2 * unshuffled_ncut
                                                         / num_clusters)
            

    # t3 = time.time()
    # print("time to cluster unshuffled weights", t3 - t2)
    result = {'ncut': unshuffled_ncut,
              'ave_in_out': ave_in_out,
              'node_mask': node_mask}
    #return clustering_labels, adj_mat, result

    if with_shuffle:
        
        # find cluster quality of other ways of rearranging the net
        print("\nclustering shuffled weights\n")
        n_samples_per_worker = num_samples // n_workers

        function_argument = (n_samples_per_worker, weights_path, #weights,
                             # loaded_weights,
                             network_type, num_clusters,
                             shuffle_smaller_model, eigen_solver, delete_isolated_ccs_bool,
                             assign_labels, epsilon, shuffle_method)
        if n_workers == 1:
            print('No Pool! Single Worker!')
            shuff_ncuts = shuffle_and_cluster(*function_argument)

        else:
            print(f'Using Pool! Multiple Workers! {n_workers}')

            workers_arguments = [[copy.deepcopy(arg) for _ in range(n_workers)]
                                  for arg in function_argument]

            with ProcessPool(nodes=n_workers) as p:
                shuff_ncuts_results = p.map(shuffle_and_cluster,
                                            *workers_arguments)

            shuff_ncuts = np.concatenate(shuff_ncuts_results)                     

        shuffled_n_samples = len(shuff_ncuts)
        shuffled_mean = np.mean(shuff_ncuts, dtype=np.float64)
        shuffled_stdev = np.std(shuff_ncuts, dtype=np.float64)
        print('BEFORE', np.std(shuff_ncuts))
        percentile = compute_pvalue(unshuffled_ncut, shuff_ncuts)
        print('AFTER', np.std(shuff_ncuts))
        z_score = (unshuffled_ncut - shuffled_mean) / shuffled_stdev
        
        result.update({'shuffle_method': shuffle_method,
                       'n_samples': shuffled_n_samples,
                       'mean': shuffled_mean,
                       'stdev': shuffled_stdev,
                       'z_score': z_score,
                       'percentile': percentile})

    if with_shuffled_ncuts:
        result['shuffled_ncuts'] = shuff_ncuts
        
    if with_labels:
        result['labels'] = clustering_labels
    
    return result
示例#9
0
def shuffle_and_cluster(num_samples, #weights,
                        weights_path,
                        #loaded_weights,
                        network_type, num_clusters,
                        shuffle_smaller_model, eigen_solver, delete_isolated_ccs_bool,
                        assign_labels, epsilon, shuffle_method):

    ######
    loaded_weights = load_weights(weights_path)
    if network_type == 'mlp':
        weights_ = loaded_weights
        adj_mat_ = weights_to_graph(loaded_weights)
    elif network_type == 'cnn':
        weights_, adj_mat_ = cnn_tensors_to_flat_weights_and_graph(loaded_weights)
    else:
        raise ValueError("network_type must be 'mlp' or 'cnn'")
    
    #######

    if shuffle_smaller_model and delete_isolated_ccs_bool:
        # delete unconnected components from the net BEFORE SHUFFLING!!!
        weights, adj_mat, _ = delete_isolated_ccs_refactored(weights_, adj_mat_,
                                                             is_testing=True)
    else:
        weights, adj_mat = weights_, adj_mat_
    
    #shuff_ncuts = np.array([])
    shuff_ncuts = []

    assert shuffle_method in SHUFFLE_METHODS


    if shuffle_method == 'layer':
        shuffle_function = shuffle_weights
    elif shuffle_method == 'layer_nonzero':
        shuffle_function = shuffle_weights_nonzero
    elif shuffle_method == 'layer_nonzero_distribution':
        shuffle_function = shuffle_weights_nonzero_distribution
    elif shuffle_method == 'layer_all_distribution':
        shuffle_function = shuffle_weights_layer_all_distribution

        
    for _ in range(num_samples):

        # t_start = time.time()
        if network_type == 'mlp':
            if shuffle_smaller_model:
                shuff_weights_ = list(map(shuffle_function, weights))
            else:
                shuff_weights_ = list(map(shuffle_function, loaded_weights))
            shuff_adj_mat_ = weights_to_graph(shuff_weights_)
        else:
            shuff_tensors = list(map(shuffle_function, loaded_weights))
            shuff_weights_, shuff_adj_mat_ = cnn_tensors_to_flat_weights_and_graph(shuff_tensors)
            # NB: this is not quite right, because you're shuffling the whole
            # network, meaning that the isolated ccs get shuffled back in

        # t_before_mid = time.time()
        # print("\ntime to shuffle weights", t_before_mid - t_start)
        if delete_isolated_ccs_bool:
            shuff_weights, shuff_adj_mat, _ = delete_isolated_ccs_refactored(shuff_weights_,
                                                                                shuff_adj_mat_)

        else:
            shuff_weights, shuff_adj_mat = shuff_weights_, shuff_adj_mat_

        # t_mid = time.time()
        # print("time to delete isolated ccs", t_mid - t_before_mid)
        shuff_ncut, _ = weights_array_to_cluster_quality(shuff_weights,
                                                         shuff_adj_mat,
                                                         num_clusters,
                                                         eigen_solver,
                                                         assign_labels, epsilon)
        shuff_ncuts.append(shuff_ncut)
        #shuff_ncuts = np.append(shuff_ncuts, shuff_ncut)
        # t_end = time.time()
        # print("time to cluster shuffled weights", t_end - t_mid)

    return np.array(shuff_ncuts)
示例#10
0
def main(args):
	np.set_printoptions(precision=precision)

	network_path = args.files[0]
	initial_weights_path = args.files[1]
	dataset_path = args.files[2]

	r, n_inputs, n_neurons, n_outputs = load_network(network_path)
	initial_weights = load_weights(initial_weights_path)
	x, y = load_benchmark(dataset_path)
	epsilon = 0.0000010000
	n = x.shape[0]

	model = NeuralNetwork(deepcopy(initial_weights), r, 0.99, 0)

	print("Parâmetro de regularização lambda={}\n".format(round(r, 3)))
	print("Inicializando rede com a seguinte estrutura de neurônios por camadas: {}\n".format([n_inputs] + n_neurons + [n_outputs]))

	for i in range(len(initial_weights)):
		print("Theta{} inicial (pesos de cada neurônio, incluindo bias, armazenados nas linhas):\n{}".format(i + 1, str_matrix(initial_weights[i], '\t')))

	print("Conjunto de treinamento")
	for i in range(x.shape[0]):
		print("\tExemplo {}".format(i + 1))
		print("\t\tx: {}".format(x[i, :]))
		print("\t\ty: {}".format(y[i, :]))

	print("\n--------------------------------------------")
	print("Calculando erro/custo J da rede")

	for i in range(x.shape[0]):
		print("\tProcessando exemplo de treinamento {}".format(i + 1))
		print("\tPropagando entrada {}".format(x[i, :]))

		f = model.forward_propagation(x[i, :])
		cost = model.cost_x(y[i, :], f)

		print("\t\ta1: {}\n".format(model.a[0]))

		for l in range(1, model.n_layers + 1):
			print("\t\tz{}: {}".format(l + 1, model.z[l]))
			print("\t\ta{}: {}\n".format(l + 1, model.a[l]))

		print("\t\tf(x[{}]): {}".format(i + 1, f))

		print("\tSaida predita para o exemplo {}: {}".format(i + 1, f))
		print("\tSaida esperada para o exemplo {}: {}".format(i + 1, y[i, :]))
		print("\tJ do exemplo {}: {}\n".format(i + 1, cost))

	print("J total do dataset (com regularizacao): {}\n".format(model.cost(x, y)))

	print("\n--------------------------------------------")
	print("Rodando backpropagation")

	for i in range(n):
		print("\tCalculando gradientes com base no exemplo {}".format(i + 1))

		model.g = [np.zeros(model.w[i].shape) for i in range(model.n_layers)]
		model.m = [np.zeros(model.w[i].shape) for i in range(model.n_layers)]

		pred = model.forward_propagation(x[i, :])
		model.d[model.last_layer] = pred - y[i, :]
		model.update_deltas(x[i, :])

		for d in range(model.last_layer, -1, -1):
			print("\t\tdelta{}: {}".format(d + 2, model.d[d]))

		model.accumulate_gradients()

		for t in range(model.last_layer, -1, -1):
			print("\t\tGradientes de Theta{} com base no exemplo {}:\n{}".format(t + 1, i + 1, str_matrix(model.g[t], '\t\t\t')))

	print("\tDataset completo processado. Calculando gradientes regularizados")

	model.final_gradients(n)

	for t in range(model.n_layers):
		print("\t\tGradientes finais para Theta{} (com regularizacao):\n{}".format(t + 1, str_matrix(model.g[t], '\t\t\t')))

	print("\n--------------------------------------------")
	print("Rodando verificacao numerica de gradientes (epsilon={})".format(epsilon))

	backprop_gradients = deepcopy(model.g)
	model.g = [np.zeros(model.w[i].shape) for i in range(model.n_layers)]

	for t in range(model.n_layers):

		for i in range(model.g[t].shape[0]):
			for j in range(model.g[t].shape[1]):
				w = model.w[t][i, j]

				model.w[t][i, j] = w + epsilon
				c1 = model.cost(x, y)

				model.w[t][i, j] = w - epsilon
				c2 = model.cost(x, y)

				model.g[t][i, j] += (c1 - c2) / (2 * epsilon)
				model.w[t][i, j] = w

		print("\tGradiente numerico de Theta{}:\n{}".format(t + 1, str_matrix(model.g[t], '\t\t')))

	print("\n--------------------------------------------")
	print("Verificando corretude dos gradientes com base nos gradientes numericos:")
	for t in range(model.n_layers):
		errors = np.sum(np.abs(model.g[t] - backprop_gradients[t]))
		print("\tErro entre gradiente via backprop e gradiente numerico para Theta{}: {}".format(t + 1, errors))