Exemplo n.º 1
0
def run_spectral_cluster(
    weights_path,
    with_shuffle=True,
    n_clusters=4,
    shuffle_method='layer',
    n_samples=None,
    n_workers=None,
    with_shuffled_ncuts=False,
    random_state=RANDOM_STATE,
):

    if 'mlp' in str(weights_path):
        named_configs = ['mlp_config']
    elif 'cnn' in str(weights_path):
        named_configs = ['cnn_config']
    else:
        raise ValueError(
            'Either mlp or cnn should be in path to determine the config.')

    config_updates = {
        'weights_path': weights_path,
        'with_labels': True,
        'with_shuffle': with_shuffle,
        'seed': random_state,
        'num_clusters': n_clusters,
        'shuffle_method': shuffle_method,
        'with_shuffled_ncuts': with_shuffled_ncuts
    }

    if n_samples is not None:
        config_updates['num_samples'] = n_samples
    if n_workers is not None:
        config_updates['n_workers'] = n_workers

    with suppress(), all_logging_disabled():
        experiment_run = clustering_experiment.run(
            config_updates=config_updates, named_configs=named_configs)

    metrics = experiment_run.result
    clustering_labels = metrics.pop('labels')
    node_mask = metrics.pop('node_mask')

    metrics.pop('shuffle_method', None)

    labels = np.full(len(node_mask), -1)
    labels[node_mask] = clustering_labels

    classification_metrics = extract_classification_metrics(
        Path(weights_path).parent)
    metrics.update(classification_metrics['unpruned'] if 'unpruned' in
                   str(weights_path) else classification_metrics['pruned'])

    return labels, metrics
def run_activations_cluster_experiment(
    activations_dir,
    weights_dir,
    exclude_inputs=True,
    filter_norm=1,
    with_shuffle=True,
    n_clusters=10,
    n_samples=None,
    n_workers=None,
    random_state=RANDOM_STATE,
):
    #
    # if 'mlp' in str(weights_dir) and 'mlp' in str(activations_dir):
    #     named_configs = ['mlp_config']
    # elif 'cnn' in str(weights_dir) and 'cnn' in str(activations_dir):
    #     named_configs = ['cnn_config']
    # else:
    #     raise ValueError('Either mlp or cnn should be in path to determine the config.')

    activations_path_dict = get_activations_paths(activations_dir)
    activations_masks_path_dict = get_activation_masks_paths(activations_dir)
    weight_path_dict = get_weights_paths(weights_dir, norm=filter_norm)

    results_dicts = []

    for is_unpruned in [True, False]:
        config_updates = {
            'activations_path': activations_path_dict[is_unpruned],
            'activations_mask_path': activations_masks_path_dict[is_unpruned],
            'weights_path': weight_path_dict[is_unpruned],
            'exclude_inputs': exclude_inputs,
            'with_shuffle': with_shuffle,
            'seed': random_state,
            'n_clusters': n_clusters
        }
        if n_samples is not None:
            config_updates['n_samples'] = n_samples
        if n_workers is not None:
            config_updates['n_workers'] = n_workers

        with suppress(), all_logging_disabled():
            experiment_run = activations_experiment.run(
                config_updates=config_updates)
        results_dicts.append(experiment_run.result)

    return {'unpruned': results_dicts[0], 'pruned': results_dicts[1]}
def compute_ncut_random_init_mlp():

    model = tf.keras.Sequential(create_mlp_layers())
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    with tempfile.TemporaryDirectory() as tmpdirname:
        save_weights(model, f'{tmpdirname}/random')

        weights_path = f'{tmpdirname}/random-weights.pckl'

        with suppress(), all_logging_disabled():
            experiment_run = clustering_experiment.run(
                config_updates={
                    'weights_path': weights_path,
                    'with_labels': False,
                    'with_shuffle': False,
                    'seed': RANDOM_STATE,
                    'num_clusters': 4
                },
                named_configs=['mlp_config'])

    return experiment_run.result['ncut']
def _perform_lesion_sub_experiment(dataset_path,
                                   run_dir,
                                   n_clusters=4,
                                   n_shuffles=200,
                                   n_side=28,
                                   depth=1,
                                   with_random=True,
                                   model_params=None,
                                   n_way=1,
                                   n_way_type='joint',
                                   unpruned=False,
                                   true_as_random=False,
                                   verbose=False):
    if verbose:
        print('Loading data...')

    ds = preprocess_dataset(dataset_path)
    X, y = ds['X_test'], ds['y_test']

    run_dir_path = Path(run_dir)
    weight_paths = get_weights_paths(run_dir_path)
    model_paths = get_model_paths(run_dir_path)
    if unpruned:
        model_path = str(model_paths[True])
        weight_path = str(next(run_dir_path.glob('*-unpruned-weights.pckl')))

    else:
        model_path = str(model_paths[False])
        weight_path = str(next(run_dir_path.glob('*-pruned-weights.pckl')))

    if 'mlp' in model_path.lower() or 'poly' in model_path.lower():
        network_type = 'mlp'
    elif 'cnn' in model_path.lower():
        network_type = 'cnn'
        X = np.reshape(X, (-1, n_side, n_side, depth))
        # assert model_params is not None, ('For CNN network type, '
        #                                   'the model_param parameter should be given.')
    else:
        raise ValueError('Network type should be expressed explicitly '
                         'either mlp or cnn in run directory files.')

    if 'poly' in model_path.lower():
        task = 'regression'
    else:
        task = 'classification'

    if verbose:
        print('Running spectral clustering...')

    labels, _ = run_spectral_cluster(weight_path,
                                     n_clusters=n_clusters,
                                     with_shuffle=False)

    if verbose:
        print('Loading model and extracting weights...')

    # os.environ['CUDA_VISIBLE_DEVICES'] = ''

    with suppress(), all_logging_disabled():
        experiment_model = load_model2(model_path)
    weights, biases = extract_weights(experiment_model, with_bias=True)
    ignore_layers = False

    if verbose:
        print('Evaluate original model...')

    evaluation = _evaluate(experiment_model, X, y, task)

    if network_type == 'mlp':
        layer_widths = extract_layer_widths(weights)
    else:
        layer_widths = []
        weight_shapes = [layer_weights.shape for layer_weights in weights]
        n_conv = sum(len(ws) == 4 for ws in weight_shapes)
        layer_widths.extend([weight_shapes[i][-1] for i in range(n_conv)])
        layer_widths.extend([ws[-1] for ws in weight_shapes[n_conv:]])

        # omit non conv layers
        weights = weights[:n_conv]
        biases = biases[:n_conv]
        layer_widths = layer_widths[:n_conv + 1]

    if verbose:
        print('Extract metadata...')

    metadata = _extract_layer_label_metadata(network_type, layer_widths,
                                             labels, ignore_layers)

    if verbose:
        print('Apply lesion trial on the true clustering...')

    true_results = _apply_lesion_trial(X,
                                       y,
                                       network_type,
                                       experiment_model,
                                       weights,
                                       biases,
                                       layer_widths,
                                       labels,
                                       ignore_layers,
                                       task,
                                       to_shuffle=true_as_random,
                                       n_way=n_way,
                                       n_way_type=n_way_type,
                                       verbose=verbose)
    if with_random:

        if verbose:
            print('Apply lesion trial on the random clusterings...')
            progress_iter = tqdm
        else:
            progress_iter = iter

        all_random_results = []
        for _ in progress_iter(range(n_shuffles)):
            random_results = _apply_lesion_trial(X,
                                                 y,
                                                 network_type,
                                                 experiment_model,
                                                 weights,
                                                 biases,
                                                 layer_widths,
                                                 labels,
                                                 ignore_layers,
                                                 task,
                                                 to_shuffle=True,
                                                 n_way=n_way,
                                                 n_way_type=n_way_type,
                                                 verbose=verbose)

            all_random_results.append(random_results)

    else:
        all_random_results = None

    if n_way == 1:
        true_results = _flatten_single_damage(true_results)

        all_random_results = (
            [_flatten_single_damage(result)
             for result in all_random_results] if all_random_results else None)

    return true_results, all_random_results, metadata, evaluation
Exemplo n.º 5
0
def _perform_lesion_sub_experiment(dataset_path,
                                   run_dir,
                                   n_clusters=4,
                                   n_shuffles=200,
                                   with_random=True,
                                   model_params=None,
                                   n_way=1,
                                   n_way_type='joint',
                                   true_as_random=False,
                                   verbose=False):

    if verbose:
        print('Loading data...')

    ds = preprocess_dataset(dataset_path)
    X, y = ds['X_test'], ds['y_test']

    run_dir_path = Path(run_dir)
    model_path = str(next(run_dir_path.glob('*-pruned.h5')))
    weight_path = str(next(run_dir_path.glob('*-pruned-weights.pckl')))

    if 'mlp' in model_path.lower():
        network_type = 'mlp'
    elif 'cnn' in model_path.lower():
        network_type = 'cnn'
        assert model_params is not None, (
            'For CNN network type, '
            'the model_param parameter should be given.')
    else:
        raise ValueError('Network type should be expressed explicitly '
                         'either mlp or cnn in run directory files.')

    if verbose:
        print('Running spectral clustering...')

    labels, _ = run_spectral_cluster(weight_path,
                                     n_clusters=n_clusters,
                                     with_shuffle=False)

    if verbose:
        print('Loading model and extracting weights...')

    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = ''

    if network_type == 'mlp':
        with suppress(), all_logging_disabled():
            experiment_model = load_model2(model_path)

        weights, biases = extract_weights(experiment_model, with_bias=True)
        ignore_layers = False

    elif network_type == 'cnn':
        with suppress(), all_logging_disabled():
            experiment_model = cnn2mlp(model_path,
                                       model_params,
                                       verbose=verbose)

        weights, biases = experiment_model.get_weights_and_biases()

        ignore_layers = experiment_model.get_ignore_layers()

    if verbose:
        print('Evaluate original model...')

    evaluation = _evaluate(experiment_model, X, y)

    layer_widths = extact_layer_widths(weights)

    if verbose:
        print('Extract metadata...')

    metadata = _extract_layer_label_metadata(network_type, layer_widths,
                                             labels, ignore_layers)

    if verbose:
        print('Apply lesion trial on the true clustering...')

    true_results = _apply_lesion_trial(X,
                                       y,
                                       network_type,
                                       experiment_model,
                                       weights,
                                       biases,
                                       layer_widths,
                                       labels,
                                       ignore_layers,
                                       to_shuffle=true_as_random,
                                       n_way=n_way,
                                       n_way_type=n_way_type,
                                       verbose=verbose)
    if with_random:

        if verbose:
            print('Apply lesion trial on the random clusterings...')
            progress_iter = tqdm
        else:
            progress_iter = iter

        all_random_results = []
        for _ in progress_iter(range(n_shuffles)):
            random_results = _apply_lesion_trial(X,
                                                 y,
                                                 network_type,
                                                 experiment_model,
                                                 weights,
                                                 biases,
                                                 layer_widths,
                                                 labels,
                                                 ignore_layers,
                                                 to_shuffle=True,
                                                 n_way=n_way,
                                                 n_way_type=n_way_type,
                                                 verbose=verbose)

            all_random_results.append(random_results)

    else:
        all_random_results = None

    if n_way == 1:
        true_results = _flatten_single_damage(true_results)

        all_random_results = (
            [_flatten_single_damage(result)
             for result in all_random_results] if all_random_results else None)

    return true_results, all_random_results, metadata, evaluation