def run_spectral_cluster( weights_path, with_shuffle=True, n_clusters=4, shuffle_method='layer', n_samples=None, n_workers=None, with_shuffled_ncuts=False, random_state=RANDOM_STATE, ): if 'mlp' in str(weights_path): named_configs = ['mlp_config'] elif 'cnn' in str(weights_path): named_configs = ['cnn_config'] else: raise ValueError( 'Either mlp or cnn should be in path to determine the config.') config_updates = { 'weights_path': weights_path, 'with_labels': True, 'with_shuffle': with_shuffle, 'seed': random_state, 'num_clusters': n_clusters, 'shuffle_method': shuffle_method, 'with_shuffled_ncuts': with_shuffled_ncuts } if n_samples is not None: config_updates['num_samples'] = n_samples if n_workers is not None: config_updates['n_workers'] = n_workers with suppress(), all_logging_disabled(): experiment_run = clustering_experiment.run( config_updates=config_updates, named_configs=named_configs) metrics = experiment_run.result clustering_labels = metrics.pop('labels') node_mask = metrics.pop('node_mask') metrics.pop('shuffle_method', None) labels = np.full(len(node_mask), -1) labels[node_mask] = clustering_labels classification_metrics = extract_classification_metrics( Path(weights_path).parent) metrics.update(classification_metrics['unpruned'] if 'unpruned' in str(weights_path) else classification_metrics['pruned']) return labels, metrics
def run_activations_cluster_experiment( activations_dir, weights_dir, exclude_inputs=True, filter_norm=1, with_shuffle=True, n_clusters=10, n_samples=None, n_workers=None, random_state=RANDOM_STATE, ): # # if 'mlp' in str(weights_dir) and 'mlp' in str(activations_dir): # named_configs = ['mlp_config'] # elif 'cnn' in str(weights_dir) and 'cnn' in str(activations_dir): # named_configs = ['cnn_config'] # else: # raise ValueError('Either mlp or cnn should be in path to determine the config.') activations_path_dict = get_activations_paths(activations_dir) activations_masks_path_dict = get_activation_masks_paths(activations_dir) weight_path_dict = get_weights_paths(weights_dir, norm=filter_norm) results_dicts = [] for is_unpruned in [True, False]: config_updates = { 'activations_path': activations_path_dict[is_unpruned], 'activations_mask_path': activations_masks_path_dict[is_unpruned], 'weights_path': weight_path_dict[is_unpruned], 'exclude_inputs': exclude_inputs, 'with_shuffle': with_shuffle, 'seed': random_state, 'n_clusters': n_clusters } if n_samples is not None: config_updates['n_samples'] = n_samples if n_workers is not None: config_updates['n_workers'] = n_workers with suppress(), all_logging_disabled(): experiment_run = activations_experiment.run( config_updates=config_updates) results_dicts.append(experiment_run.result) return {'unpruned': results_dicts[0], 'pruned': results_dicts[1]}
def compute_ncut_random_init_mlp(): model = tf.keras.Sequential(create_mlp_layers()) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) with tempfile.TemporaryDirectory() as tmpdirname: save_weights(model, f'{tmpdirname}/random') weights_path = f'{tmpdirname}/random-weights.pckl' with suppress(), all_logging_disabled(): experiment_run = clustering_experiment.run( config_updates={ 'weights_path': weights_path, 'with_labels': False, 'with_shuffle': False, 'seed': RANDOM_STATE, 'num_clusters': 4 }, named_configs=['mlp_config']) return experiment_run.result['ncut']
def _perform_lesion_sub_experiment(dataset_path, run_dir, n_clusters=4, n_shuffles=200, n_side=28, depth=1, with_random=True, model_params=None, n_way=1, n_way_type='joint', unpruned=False, true_as_random=False, verbose=False): if verbose: print('Loading data...') ds = preprocess_dataset(dataset_path) X, y = ds['X_test'], ds['y_test'] run_dir_path = Path(run_dir) weight_paths = get_weights_paths(run_dir_path) model_paths = get_model_paths(run_dir_path) if unpruned: model_path = str(model_paths[True]) weight_path = str(next(run_dir_path.glob('*-unpruned-weights.pckl'))) else: model_path = str(model_paths[False]) weight_path = str(next(run_dir_path.glob('*-pruned-weights.pckl'))) if 'mlp' in model_path.lower() or 'poly' in model_path.lower(): network_type = 'mlp' elif 'cnn' in model_path.lower(): network_type = 'cnn' X = np.reshape(X, (-1, n_side, n_side, depth)) # assert model_params is not None, ('For CNN network type, ' # 'the model_param parameter should be given.') else: raise ValueError('Network type should be expressed explicitly ' 'either mlp or cnn in run directory files.') if 'poly' in model_path.lower(): task = 'regression' else: task = 'classification' if verbose: print('Running spectral clustering...') labels, _ = run_spectral_cluster(weight_path, n_clusters=n_clusters, with_shuffle=False) if verbose: print('Loading model and extracting weights...') # os.environ['CUDA_VISIBLE_DEVICES'] = '' with suppress(), all_logging_disabled(): experiment_model = load_model2(model_path) weights, biases = extract_weights(experiment_model, with_bias=True) ignore_layers = False if verbose: print('Evaluate original model...') evaluation = _evaluate(experiment_model, X, y, task) if network_type == 'mlp': layer_widths = extract_layer_widths(weights) else: layer_widths = [] weight_shapes = [layer_weights.shape for layer_weights in weights] n_conv = sum(len(ws) == 4 for ws in weight_shapes) layer_widths.extend([weight_shapes[i][-1] for i in range(n_conv)]) layer_widths.extend([ws[-1] for ws in weight_shapes[n_conv:]]) # omit non conv layers weights = weights[:n_conv] biases = biases[:n_conv] layer_widths = layer_widths[:n_conv + 1] if verbose: print('Extract metadata...') metadata = _extract_layer_label_metadata(network_type, layer_widths, labels, ignore_layers) if verbose: print('Apply lesion trial on the true clustering...') true_results = _apply_lesion_trial(X, y, network_type, experiment_model, weights, biases, layer_widths, labels, ignore_layers, task, to_shuffle=true_as_random, n_way=n_way, n_way_type=n_way_type, verbose=verbose) if with_random: if verbose: print('Apply lesion trial on the random clusterings...') progress_iter = tqdm else: progress_iter = iter all_random_results = [] for _ in progress_iter(range(n_shuffles)): random_results = _apply_lesion_trial(X, y, network_type, experiment_model, weights, biases, layer_widths, labels, ignore_layers, task, to_shuffle=True, n_way=n_way, n_way_type=n_way_type, verbose=verbose) all_random_results.append(random_results) else: all_random_results = None if n_way == 1: true_results = _flatten_single_damage(true_results) all_random_results = ( [_flatten_single_damage(result) for result in all_random_results] if all_random_results else None) return true_results, all_random_results, metadata, evaluation
def _perform_lesion_sub_experiment(dataset_path, run_dir, n_clusters=4, n_shuffles=200, with_random=True, model_params=None, n_way=1, n_way_type='joint', true_as_random=False, verbose=False): if verbose: print('Loading data...') ds = preprocess_dataset(dataset_path) X, y = ds['X_test'], ds['y_test'] run_dir_path = Path(run_dir) model_path = str(next(run_dir_path.glob('*-pruned.h5'))) weight_path = str(next(run_dir_path.glob('*-pruned-weights.pckl'))) if 'mlp' in model_path.lower(): network_type = 'mlp' elif 'cnn' in model_path.lower(): network_type = 'cnn' assert model_params is not None, ( 'For CNN network type, ' 'the model_param parameter should be given.') else: raise ValueError('Network type should be expressed explicitly ' 'either mlp or cnn in run directory files.') if verbose: print('Running spectral clustering...') labels, _ = run_spectral_cluster(weight_path, n_clusters=n_clusters, with_shuffle=False) if verbose: print('Loading model and extracting weights...') import os os.environ['CUDA_VISIBLE_DEVICES'] = '' if network_type == 'mlp': with suppress(), all_logging_disabled(): experiment_model = load_model2(model_path) weights, biases = extract_weights(experiment_model, with_bias=True) ignore_layers = False elif network_type == 'cnn': with suppress(), all_logging_disabled(): experiment_model = cnn2mlp(model_path, model_params, verbose=verbose) weights, biases = experiment_model.get_weights_and_biases() ignore_layers = experiment_model.get_ignore_layers() if verbose: print('Evaluate original model...') evaluation = _evaluate(experiment_model, X, y) layer_widths = extact_layer_widths(weights) if verbose: print('Extract metadata...') metadata = _extract_layer_label_metadata(network_type, layer_widths, labels, ignore_layers) if verbose: print('Apply lesion trial on the true clustering...') true_results = _apply_lesion_trial(X, y, network_type, experiment_model, weights, biases, layer_widths, labels, ignore_layers, to_shuffle=true_as_random, n_way=n_way, n_way_type=n_way_type, verbose=verbose) if with_random: if verbose: print('Apply lesion trial on the random clusterings...') progress_iter = tqdm else: progress_iter = iter all_random_results = [] for _ in progress_iter(range(n_shuffles)): random_results = _apply_lesion_trial(X, y, network_type, experiment_model, weights, biases, layer_widths, labels, ignore_layers, to_shuffle=True, n_way=n_way, n_way_type=n_way_type, verbose=verbose) all_random_results.append(random_results) else: all_random_results = None if n_way == 1: true_results = _flatten_single_damage(true_results) all_random_results = ( [_flatten_single_damage(result) for result in all_random_results] if all_random_results else None) return true_results, all_random_results, metadata, evaluation