def test_compute_pvalue(): assert isclose(1 / 101, compute_pvalue(0, np.arange(100))) assert isclose(2 / 101, compute_pvalue(1, np.arange(100))) assert isclose(101 / 101, compute_pvalue(101, np.arange(100)))
def perform_lesion_experiment_imagenet( network, num_clusters=10, num_shuffles=10, with_random=True, downsampled=False, eigen_solver='arpack', batch_size=32, data_dir='/project/clusterability_in_neural_networks/datasets/imagenet2012', val_tar='ILSVRC2012_img_val.tar', downsampled_n_samples=10000): assert network != 'inceptionv3', 'This function does not yet support inceptionv3' net, preprocess = Classifiers.get( network) # get network object and preprocess fn model = net((224, 224, 3), weights='imagenet') # get network tf.keras.model data_path = Path(data_dir) tfrecords = list(data_path.glob('*validation.tfrecord*')) if not tfrecords: prep_imagenet_validation_data(data_dir, val_tar) # this'll take a sec imagenet = tfds.image.Imagenet2012() # dataset builder object imagenet._data_dir = data_dir val_dataset_object = imagenet.as_dataset( split='validation') # datast object # assert isinstance(val_dataset_object, tf.data.Dataset) if downsampled: # get the ssmall dataset as an np.ndarray dataset, y = imagenet_downsampled_dataset( val_dataset_object, preprocess, n_images=downsampled_n_samples) steps = None val_set_size = downsampled_n_samples else: dataset = imagenet_generator(val_dataset_object, preprocess) val_set_size = 50000 steps = val_set_size // 250 # use batch_size of 250 y = [] # to become an ndarray of true labels for _ in range(steps): _, logits = next(dataset) y.append(np.argmax(logits, axis=-1)) y = np.concatenate(y) batch_size = None # get info from clustering clustering_results = run_clustering_imagenet(network, num_clusters=num_clusters, with_shuffle=False, eigen_solver=eigen_solver) labels = clustering_results['labels'] connections = clustering_results[ 'conv_connections'] # just connections for conv layers layer_widths = [cc[0]['weights'].shape[0] for cc in connections[1:]] # skip first conv layer dense_sizes = get_dense_sizes(connections) layer_widths.extend(list(dense_sizes.values())) labels_in_layers = list(splitter(labels, layer_widths)) y_pred = np.argmax(model.predict(dataset, steps=steps, batch_size=batch_size), axis=-1) if not isinstance(dataset, np.ndarray): dataset = imagenet_generator(val_dataset_object, preprocess) evaluation = _get_classification_accs_imagenet( y, y_pred) # an ndarray of all 1000 class accs # next get true accs and label bincounts for the 1000 classes accs_true, class_props_true, cluster_sizes = lesion_test_imagenet( model, dataset, y, labels_in_layers, num_clusters, steps, batch_size, val_dataset_object, preprocess, num_samples=1) accs_true = accs_true[0] # it's a 1 element list, so just take the first class_props_true = class_props_true[0] # same as line above if not with_random: # make and return a dict with a keys giving sub modules and values giving # num shuffles, overall acc, and class accs results = {} for layer_key in accs_true.keys(): results[layer_key] = {} for cluster_key in accs_true[layer_key].keys(): sm_results = {} true_accs = accs_true[layer_key][cluster_key] sm_results['num_shuffles'] = num_shuffles sm_results['overall_acc'] = np.mean(true_accs) sm_results['class_accs'] = true_accs results[layer_key][cluster_key] = sm_results return evaluation, results else: # perform random lesion tests num_shuffles times # get random results all_acc_random, all_class_props, _ = lesion_test_imagenet( model, dataset, y, labels_in_layers, num_clusters, steps, batch_size, val_dataset_object, preprocess, num_shuffles, shuffle=True) # make and return a dict with a keys giving sub modules and values giving # stats about true labels, shufflings, and p values for hypothesis tests results = {} for layer_key in accs_true.keys(): results[layer_key] = {} for cluster_key in accs_true[layer_key].keys(): sm_results = {} true_accs = accs_true[layer_key][cluster_key] random_accs = np.vstack([ all_acc_random[i][layer_key][cluster_key] for i in range(num_shuffles) ]) overall_acc = np.mean(true_accs) overall_random_accs = np.mean(random_accs, axis=1) overall_acc_percentile = compute_pvalue( overall_acc, overall_random_accs) overall_acc_effect_factor = np.mean( overall_random_accs) / overall_acc random_changes = random_accs - evaluation normalized_random_changes = ( random_changes.T / np.mean(random_changes, axis=-1)).T random_range_normalized_changes = np.ptp( normalized_random_changes, axis=-1) true_changes = true_accs - evaluation normalized_true_changes = true_changes / np.mean(true_changes) true_range_normalized_changes = np.ptp(normalized_true_changes) range_percentile = compute_pvalue( true_range_normalized_changes, random_range_normalized_changes, side='right') range_effect_factor = np.mean(random_range_normalized_changes ) / true_range_normalized_changes sm_results['cluster_size'] = cluster_sizes[layer_key][ cluster_key] sm_results['acc'] = overall_acc sm_results['acc_percentile'] = overall_acc_percentile sm_results[ 'overall_acc_effect_factor'] = overall_acc_effect_factor sm_results['range'] = true_range_normalized_changes sm_results['range_percentile'] = range_percentile sm_results['range_effect_factor'] = range_effect_factor results[layer_key][cluster_key] = sm_results return evaluation, results
def do_lesion_hypo_tests(evaluation, true_results, all_random_results): n_submodules = len(true_results) n_shuffles = len(all_random_results) if 'mses' in true_results[0]: # if regression n_inputs = 2 coefs = (0, 1) exps = (0, 1, 2) n_terms = len(exps)**n_inputs n_outputs = len(coefs)**n_terms poly_coefs = np.zeros((n_outputs, n_terms)) for poly_i, coef_list in enumerate(it.product(coefs, repeat=n_terms)): poly_coefs[poly_i] = np.array(coef_list) term_exps = [exs for exs in it.product(exps, repeat=n_inputs)] n_terms = len(term_exps) # random_mses has shape (n_random, n_submodules, n_outputs) random_out_raw = np.array( [[rand_sm['mses'] for rand_sm in rand_results] for rand_results in all_random_results]) # true_mses has shape (n_submodules, n_outputs) true_out_raw = np.array([true_sm['mses'] for true_sm in true_results]) # eval_out_raw has shape (n_outputs,) eval_out_raw = evaluation['mses'] # random_outs has shape (n_random, n_submodules, n_terms) random_outs = np.array([[[ np.mean( np.array([ rand_sm[output_i] for output_i in range(n_outputs) if poly_coefs[output_i][term_i] == 1 ])) for term_i in range(n_terms) ] for rand_sm in rand_mses] for rand_mses in random_out_raw]) # true_outs has shape (n_submodules, n_terms) true_outs = np.array([[ np.mean( np.array([ true_sm[output_i] for output_i in range(n_outputs) if poly_coefs[output_i][term_i] == 1 ])) for term_i in range(n_terms) ] for true_sm in true_out_raw]) # eval_outs has shape (n_terms) eval_outs = np.array([ np.mean( np.array([ eval_out_raw[output_i] for output_i in range(n_outputs) if poly_coefs[output_i][term_i] == 1 ])) for term_i in range(n_terms) ]) else: # if classification # random_outs has shape (n_random, n_submodules, n_outputs) random_outs = np.array([[[ class_acc for key, class_acc in rand_sm.items() if 'acc' in key and 'overall' not in key ] for rand_sm in rand_results] for rand_results in all_random_results]) # true_outs has shape (n_submodules, n_outputs) true_outs = np.array([[ class_acc for key, class_acc in true_sm.items() if 'acc' in key and 'overall' not in key ] for true_sm in true_results]) # eval_outs has shape (n_outputs,) eval_outs = np.array([ class_acc for key, class_acc in evaluation.items() if 'acc' in key and 'overall' not in key ]) random_means = np.mean(random_outs, axis=-1) true_means = np.mean(true_outs, axis=-1) random_changes = random_outs - eval_outs random_normalized_changes = random_changes / np.mean( random_changes, axis=-1)[:, :, np.newaxis] random_ranges_normalized_changes = np.ptp(random_normalized_changes, axis=-1) true_changes = true_outs - eval_outs true_normalized_changes = true_changes / np.mean(true_changes, axis=-1)[:, np.newaxis] true_ranges_normalized_changes = np.ptp(true_normalized_changes, axis=-1) mean_percentiles = np.array([ compute_pvalue(true_means[sm_i], random_means[:, sm_i]) for sm_i in range(n_submodules) ]) range_percentiles = np.array([ compute_pvalue(true_ranges_normalized_changes[sm_i], random_ranges_normalized_changes[:, sm_i], side='right') for sm_i in range(n_submodules) ]) # get effect sizes effect_factor_means = np.nanmean( np.array([ np.mean(random_means[:, sm_i]) / true_means[sm_i] for sm_i in range(n_submodules) ])) effect_factor_ranges = np.nanmean( np.array([ np.mean(random_ranges_normalized_changes[:, sm_i]) / true_ranges_normalized_changes[sm_i] for sm_i in range(n_submodules) ])) chi2_p_means = chi2_categorical_test(mean_percentiles, n_shuffles) chi2_p_ranges = chi2_categorical_test(range_percentiles, n_shuffles) combined_p_means = combine_ps(mean_percentiles, n_shuffles) combined_p_ranges = combine_ps(range_percentiles, n_shuffles) results = { 'mean_percentiles': mean_percentiles, 'range_percentiles': range_percentiles, 'effect_factor_means': effect_factor_means, 'effect_factor_range': effect_factor_ranges, 'chi2_p_means': chi2_p_means, 'chi2_p_ranges': chi2_p_ranges, 'combined_p_means': combined_p_means, 'combined_p_ranges': combined_p_ranges } return results
def run_clustering(weights_path, num_clusters, eigen_solver, assign_labels, epsilon, num_samples, delete_isolated_ccs_bool, network_type, shuffle_smaller_model, with_labels, with_shuffle, shuffle_method, n_workers, is_testing, with_shuffled_ncuts): # t0 = time.time() # load weights and get adjacency matrix if is_testing: assert network_type == 'cnn' loaded_weights = load_weights(weights_path) if network_type == 'mlp': weights_ = loaded_weights adj_mat_ = weights_to_graph(loaded_weights) elif network_type == 'cnn': # comparing current and previous version of expanding CNN if is_testing: tester_cnn_tensors_to_flat_weights_and_graph(loaded_weights) weights_, adj_mat_ = cnn_tensors_to_flat_weights_and_graph(loaded_weights) else: raise ValueError("network_type must be 'mlp' or 'cnn'") # t1 = time.time() # print('time to form adjacency matrix', t1 - t0) # analyse connectivity structure of network # cc_dict = connected_comp_analysis(weights_, adj_mat_) # print("connectivity analysis:", cc_dict) if delete_isolated_ccs_bool: # delete unconnected components from the net weights, adj_mat, node_mask = delete_isolated_ccs_refactored(weights_, adj_mat_, is_testing=is_testing) if is_testing: weights_old, adj_mat_old = delete_isolated_ccs(weights_, adj_mat_) assert (adj_mat != adj_mat_old).sum() == 0 assert all((w1 == w2).all() for w1, w2 in zip(weights, weights_old)) else: weights, adj_mat = weights_, adj_mat_ node_mask = numpy.full(adj_mat.shape[0], True) # t2 = time.time() # print("time to delete isolated ccs", t2 - t1) # find cluster quality of this pruned net print("\nclustering unshuffled weights\n") unshuffled_ncut, clustering_labels = weights_array_to_cluster_quality(weights, adj_mat, num_clusters, eigen_solver, assign_labels, epsilon, is_testing) ave_in_out = (1 - unshuffled_ncut / num_clusters) / (2 * unshuffled_ncut / num_clusters) # t3 = time.time() # print("time to cluster unshuffled weights", t3 - t2) result = {'ncut': unshuffled_ncut, 'ave_in_out': ave_in_out, 'node_mask': node_mask} #return clustering_labels, adj_mat, result if with_shuffle: # find cluster quality of other ways of rearranging the net print("\nclustering shuffled weights\n") n_samples_per_worker = num_samples // n_workers function_argument = (n_samples_per_worker, weights_path, #weights, # loaded_weights, network_type, num_clusters, shuffle_smaller_model, eigen_solver, delete_isolated_ccs_bool, assign_labels, epsilon, shuffle_method) if n_workers == 1: print('No Pool! Single Worker!') shuff_ncuts = shuffle_and_cluster(*function_argument) else: print(f'Using Pool! Multiple Workers! {n_workers}') workers_arguments = [[copy.deepcopy(arg) for _ in range(n_workers)] for arg in function_argument] with ProcessPool(nodes=n_workers) as p: shuff_ncuts_results = p.map(shuffle_and_cluster, *workers_arguments) shuff_ncuts = np.concatenate(shuff_ncuts_results) shuffled_n_samples = len(shuff_ncuts) shuffled_mean = np.mean(shuff_ncuts, dtype=np.float64) shuffled_stdev = np.std(shuff_ncuts, dtype=np.float64) print('BEFORE', np.std(shuff_ncuts)) percentile = compute_pvalue(unshuffled_ncut, shuff_ncuts) print('AFTER', np.std(shuff_ncuts)) z_score = (unshuffled_ncut - shuffled_mean) / shuffled_stdev result.update({'shuffle_method': shuffle_method, 'n_samples': shuffled_n_samples, 'mean': shuffled_mean, 'stdev': shuffled_stdev, 'z_score': z_score, 'percentile': percentile}) if with_shuffled_ncuts: result['shuffled_ncuts'] = shuff_ncuts if with_labels: result['labels'] = clustering_labels return result
def compute_damaged_cluster_stats(true_results, all_random_results, metadata, evaluation, pvalue_threshod=None, diff_threshold=-1 / 100, double_joint_df=None, single_df=None, diff_field='diff'): n_way = 2 if 'labels_in_layers' in true_results[0] else 1 index = ['labels_in_layers'] if n_way == 2 else ['layer', 'label'] assert diff_field in ('diff', 's_i|j') if diff_field == 's_i|j': assert n_way == 2 assert single_df is not None if pvalue_threshod is None: pvalue_threshod = 1 / len(all_random_results) true_df = (pd.DataFrame(true_results).set_index(index).sort_index()) all_random_df = pd.DataFrame(sum(all_random_results, [])) all_random_layer_label = all_random_df.groupby(index) true_df = true_df['acc_overall'] all_random_layer_label = all_random_layer_label['acc_overall'] corrected_pvalue_by_groups = ( (layer_label, compute_pvalue(true_df[layer_label], group)) for layer_label, group in all_random_layer_label) layer_label_index, corrected_pvalue_by_layer_label = zip( *corrected_pvalue_by_groups) # make sure that the order of true_df and corrected_pvalue_by_layer_label is the same # before setting corrected_pvalues_df's index as same as true_df assert tuple(true_df.index) == layer_label_index corrected_pvalues_df = pd.DataFrame( {'acc_overall': corrected_pvalue_by_layer_label}, index=true_df.index) corrected_pvalues_df = (corrected_pvalues_df.assign( value='corrected_pvalue').set_index(['value'], append=True)) true_df = pd.DataFrame(true_df).assign(value='true').set_index(['value'], append=True) random_stats_df = pd.DataFrame( {'acc_overall': all_random_layer_label.agg(['mean', 'std']).stack()}) z_score_df = ((true_df - random_stats_df.xs('mean', level=-1)) / random_stats_df.xs('std', level=-1)) z_score_df.index = z_score_df.index.set_levels(['z_score'], level=-1) diff_df = (true_df - evaluation['acc_overall']) diff_df.index = diff_df.index.set_levels(['diff'], level=-1) stats_df = ( pd.concat([ true_df, corrected_pvalues_df, # pvalues_df, diff_df, z_score_df, random_stats_df ]).sort_index()) metadata_df = (pd.DataFrame(metadata).set_index(['layer', 'label'])) overall_stats_df = stats_df.unstack() overall_stats_df.columns = overall_stats_df.columns.droplevel() if n_way == 1: overall_stats_df = pd.concat([overall_stats_df, metadata_df], axis=1) if diff_field == 's_i|j': overall_stats_df = enrich_score_double_conditional_df( overall_stats_df, single_df) overall_stats_df['taxonomy'] = overall_stats_df.apply( lambda r: layer_cluster_taxonomify(r, with_proportion=(n_way == 1), pvalue_threshod=pvalue_threshod, diff_threshold=diff_threshold, diff_field=diff_field), axis=1) overall_columns = ['diff', 'corrected_pvalue'] if n_way == 1: overall_columns.append('label_in_layer_proportion') overall_columns.append('true') overall_columns.extend(['taxonomy', 'mean', 'std', 'z_score']) if n_way == 1: overall_columns.append('n_layer_label') overall_stats_df = overall_stats_df[overall_columns] # adding the diagonal (i.e., single) to a conditional double using the joint double if double_joint_df is not None: assert n_way == 2, '`double_joint_df` should be given only for double' warnings.warn( 'Make sure that `n_shuffled` for conditional double results should be the same' ' as the one for generating the joint double df!') double_same_pair_mask = [ first == second for first, second in double_joint_df.index ] double_same_pair_df = double_joint_df[double_same_pair_mask] overall_stats_df = pd.concat([overall_stats_df, double_same_pair_df]).sort_index() return overall_stats_df
def clustering_comparisons(activations, act_labels, act_mask, corr_adj, weight_labels, weight_mask, n_clusters, n_samples, with_shuffle, epsilon): # only consider units that were connected in the weight and activation graphs weight_act_mask = weight_mask[act_mask] act_weight_mask = act_mask[weight_mask] activations = activations[weight_act_mask] act_labels = act_labels[weight_act_mask] weight_labels = weight_labels[act_weight_mask] n_units = len(act_labels) assert len(act_labels) == len(weight_labels) assert n_units == np.sum(act_mask * weight_mask) # get normalized mutual info between two clusterings nmi = normalized_mutual_info_score(act_labels, weight_labels) # get the ncut that results from using the activation adj mat with the weight-based clustering labels mask_corr_adj = corr_adj[weight_act_mask, :][:, weight_act_mask] transfer_ncut = compute_ncut(mask_corr_adj, weight_labels, epsilon) # next, calculate the average intra and inter cluster corr_adj based on the weight labels intra_adj = np.array([]) inter_adj = np.array([]) for label in range(n_clusters): weight_label_mask = weight_labels == label intra_adj = np.append( intra_adj, mask_corr_adj[weight_label_mask, :][:, weight_label_mask].flatten()) inter_adj = np.append( inter_adj, mask_corr_adj[weight_label_mask, :][:, 1 - weight_label_mask].flatten()) intra_mean = np.sum(intra_adj) / (len(intra_adj) - n_units ) # correct denom to ignore 0 self edges inter_mean = np.mean(inter_adj) # cca_grid = grid_cca(activations, weight_labels, n_clusters) results = { 'normalized_mutual_information': nmi, 'transfer_ncut': transfer_ncut, 'intra_mean': intra_mean, 'inter_mean': inter_mean } # , 'cca_grid': cca_grid} if with_shuffle: shuffled_nmis = [] for _ in range(n_samples): np.random.shuffle(weight_labels) shuffled_nmis.append( normalized_mutual_info_score(act_labels, weight_labels)) shuffled_nmis = np.array(shuffled_nmis) shuffled_mean = np.mean(shuffled_nmis) shuffled_stdev = np.std(shuffled_nmis) results.update({ 'n_samples': n_samples, 'mean': shuffled_mean, 'stdev': shuffled_stdev, 'z_score': (nmi - shuffled_mean) / shuffled_stdev, 'percentile': compute_pvalue(nmi, shuffled_nmis) }) return results
def do_clustering_activations(network_type, activations_path, activations_mask_path, corr_type, n_clusters, n_inputs, n_outputs, exclude_inputs, eigen_solver, assign_labels, epsilon, n_samples, with_shuffle, n_workers): with open(activations_path, 'rb') as f: activations = pickle.load(f) with open(activations_mask_path, 'rb') as f: activations_mask = pickle.load(f) if network_type == 'cnn': # for the cnns, only look at conv layers if 'stacked' in str(activations_path).lower(): n_in = n_inputs * 2 else: n_in = n_inputs cnn_params = CNN_VGG_MODEL_PARAMS if 'vgg' in str( activations_path).lower() else CNN_MODEL_PARAMS n_conv_filters = sum([cl['filters'] for cl in cnn_params['conv']]) n_start = np.sum(activations_mask[:n_in]) n_stop = n_start + np.sum(activations_mask[n_in:n_in + n_conv_filters]) activations = activations[n_start:n_stop, :] activations_mask = activations_mask[n_in:n_in + n_conv_filters] elif exclude_inputs: n_in = n_inputs n_start = np.sum(activations_mask[:n_in]) activations = activations[n_start:-n_outputs, :] activations_mask = activations_mask[n_in:-n_outputs] corr_adj = get_corr_adj(activations, corr_type) unshuffled_ncut, clustering_labels = weights_array_to_cluster_quality( None, corr_adj, n_clusters, eigen_solver, assign_labels, epsilon, is_testing=False) ave_in_out = (1 - unshuffled_ncut / n_clusters) / (2 * unshuffled_ncut / n_clusters) ent = entropy(clustering_labels) label_proportions = np.bincount(clustering_labels) / len(clustering_labels) result = { 'activations': activations, 'corr_adj': corr_adj, 'mask': activations_mask, 'ncut': unshuffled_ncut, 'ave_in_out': ave_in_out, 'labels': clustering_labels, 'label_proportions': label_proportions, 'entropy': ent } if with_shuffle: n_samples_per_worker = n_samples // n_workers function_argument = (n_samples_per_worker, corr_adj, n_clusters, eigen_solver, assign_labels, epsilon) if n_workers == 1: print('No Pool! Single Worker!') shuff_ncuts = shuffle_and_cluster_activations(*function_argument) else: print(f'Using Pool! Multiple Workers! {n_workers}') workers_arguments = [[ copy.deepcopy(arg) for _ in range(n_workers) ] for arg in function_argument] with ProcessPool(nodes=n_workers) as p: shuff_ncuts_results = p.map(shuffle_and_cluster_activations, *workers_arguments) shuff_ncuts = np.concatenate(shuff_ncuts_results) shuffled_n_samples = len(shuff_ncuts) shuffled_mean = np.mean(shuff_ncuts, dtype=np.float64) shuffled_stdev = np.std(shuff_ncuts, dtype=np.float64) print('BEFORE', np.std(shuff_ncuts)) percentile = compute_pvalue(unshuffled_ncut, shuff_ncuts) print('AFTER', np.std(shuff_ncuts)) z_score = (unshuffled_ncut - shuffled_mean) / shuffled_stdev result.update({ 'n_samples': shuffled_n_samples, 'mean': shuffled_mean, 'stdev': shuffled_stdev, 'z_score': z_score, 'percentile': percentile }) return result
def evaluate_imagenet_visualizations( model_tag, data_dir='/project/clusterability_in_neural_networks/datasets/'): assert model_tag in VIS_NETS with open(data_dir + model_tag + '_max_data.pkl', 'rb') as f: data = pickle.load(f) # unpack data max_images = data['max_images'] # min_images = data['min_images'] random_max_images = data['random_max_images'] # random_min_images = data['random_min_images'] max_losses = data['max_losses'] # min_losses = data['min_losses'] random_max_losses = data['random_max_losses'] # random_min_losses = data['random_min_losses'] sm_sizes = data['sm_sizes'] sm_layers = data['sm_layers'] sm_layer_sizes = data['sm_layer_sizes'] sm_clusters = data['sm_clusters'] n_examples = len(sm_sizes) n_random = int(len(random_max_images) / n_examples) input_side = max_images.shape[1] # get model net, preprocess = Classifiers.get( model_tag) # get network object and preprocess fn model = net((input_side, input_side, 3), weights='imagenet') # get network tf.keras.model # get predictions max_preds = model.predict(max_images) # min_preds = model.predict(min_images) random_max_preds = np.reshape(model.predict(random_max_images), (n_examples, n_random, -1)) # random_min_preds = np.reshape(model.predict(random_min_images), (n_examples, n_random, -1)) # get entropies max_entropies = np.array([entropy(pred) for pred in max_preds]) # min_entropies = np.array([entropy(pred) for pred in min_preds]) random_max_entropies = np.array([[entropy(pred) for pred in reps] for reps in random_max_preds]) # random_min_entropies = np.array([[entropy(pred) for pred in reps] for reps in random_min_preds]) # reshape losses random_max_losses = np.reshape(random_max_losses, (n_examples, n_random)) # random_min_losses = np.reshape(random_min_losses, (n_examples, n_random)) # get percentiles max_percentiles_entropy = np.array([ compute_pvalue(max_entropies[i], random_max_entropies[i]) for i in range(len(max_entropies)) ]) # min_percentiles_entropy = np.array([compute_pvalue(min_entropies[i], random_min_entropies[i]) # for i in range(len(min_entropies))]) max_percentiles_loss = np.array([ compute_pvalue(max_losses[i], random_max_losses[i], side='right') for i in range(len(max_losses)) ]) # min_percentiles_loss = np.array([compute_pvalue(min_losses[i], random_min_losses[i]) # for i in range(len(min_losses))]) # get effect sizes effect_factor_entropy = np.mean( np.array([ np.mean(random_max_entropies[i]) / max_entropies[i] for i in range(len(max_entropies)) ])) effect_factor_loss = np.mean( np.array([ np.mean(random_max_losses[i]) / max_losses[i] for i in range(len(max_losses)) ])) # get pvalues max_chi2_p_entropy = chi2_categorical_test(max_percentiles_entropy, n_random) max_combined_p_entropy = combine_ps(max_percentiles_entropy, n_random) max_chi2_p_loss = chi2_categorical_test(max_percentiles_loss, n_random) max_combined_p_loss = combine_ps(max_percentiles_loss, n_random) results = { 'percentiles': (max_percentiles_entropy, max_percentiles_loss), 'effect_factors': (effect_factor_entropy, effect_factor_loss), 'chi2_ps': (max_chi2_p_entropy, max_chi2_p_loss), 'combined_ps': (max_combined_p_entropy, max_combined_p_loss), 'sm_layers': sm_layers, 'sm_sizes': sm_sizes, 'sm_layer_sizes': sm_layer_sizes, 'sm_clusters': sm_clusters } return results
def evaluate_visualizations( model_tag, rep, is_unpruned, data_dir='/project/clusterability_in_neural_networks/datasets/'): if is_unpruned: suff = f'{rep}_unpruned_max_data.pkl' else: suff = f'{rep}_pruned_max_data.pkl' with open(data_dir + model_tag + suff, 'rb') as f: data = pickle.load(f) # unpack data max_images = data['max_images'] random_max_images = data['random_max_images'] max_losses = data['max_losses'] random_max_losses = data['random_max_losses'] sm_sizes = data['sm_sizes'] sm_layers = data['sm_layers'] sm_layer_sizes = data['sm_layer_sizes'] sm_clusters = data['sm_clusters'] n_examples = len(sm_sizes) n_max_min = int(len(max_images) / n_examples) n_random = int(len(random_max_images) / n_examples) input_side = max_images.shape[1] # flatten all inputs if mlp if 'mlp' in model_tag.lower(): max_images = np.reshape(max_images, [-1, IMAGE_SIZE**2]) random_max_images = np.reshape(random_max_images, [-1, IMAGE_SIZE**2]) # get model model_dir = get_model_path(model_tag, filter_='all')[rep] model_path = get_model_paths(model_dir)[is_unpruned] model = load_model2(model_path) # get predictions max_preds = model.predict(max_images) random_max_preds = np.reshape(model.predict(random_max_images), (n_examples, n_random, -1)) # get entropies max_entropies = np.array([entropy(pred) for pred in max_preds]) random_max_entropies = np.array([[entropy(pred) for pred in reps] for reps in random_max_preds]) # reshape losses random_max_losses = np.reshape(random_max_losses, (n_examples, n_random)) # get percentiles max_percentiles_entropy = np.array([ compute_pvalue(max_entropies[i], random_max_entropies[i]) for i in range(len(max_entropies)) ]) max_percentiles_loss = np.array([ compute_pvalue(max_losses[i], random_max_losses[i], side='right') for i in range(len(max_losses)) ]) # get effect sizes effect_factors_entropies = np.array([ np.mean(random_max_entropies[i]) / max_entropies[i] for i in range(len(max_entropies)) if max_entropies[i] > 0 ]) mean_effect_factor_entropy = np.nanmean(effect_factors_entropies) effect_factors_losses = np.array([ np.mean(random_max_losses[i]) / max_losses[i] for i in range(len(max_losses)) if max_losses[i] > 0 ]) mean_effect_factor_loss = np.nanmean(effect_factors_losses) # get pvalues max_chi2_p_entropy = chi2_categorical_test(max_percentiles_entropy, n_random) max_combined_p_entropy = combine_ps(max_percentiles_entropy, n_random) max_chi2_p_loss = chi2_categorical_test(max_percentiles_loss, n_random) max_combined_p_loss = combine_ps(max_percentiles_loss, n_random) results = { 'percentiles': ( max_percentiles_entropy, # min_percentiles_entropy, max_percentiles_loss), # min_percentiles_loss), 'effect_factors': (mean_effect_factor_entropy, mean_effect_factor_loss), 'chi2_ps': ( max_chi2_p_entropy, # min_chi2_categorical_p_entropy, max_chi2_p_loss), # min_chi2_categorical_p_loss), 'combined_ps': (max_combined_p_entropy, max_combined_p_loss), 'sm_layers': sm_layers, 'sm_sizes': sm_sizes, 'sm_layer_sizes': sm_layer_sizes, 'sm_clusters': sm_clusters } return results