Exemplo n.º 1
0
def isoforest_def():
    # ## Defense parameters
    # Set these parameters according to the specific attack for which you
    # would like to test the isolation forest.

    # dataset = 'drebin'
    # model_id = 'linearsvm'
    # This path should be the one where the attack script created the attack artifacts
    atk_dir = '/net/data/malware-backdoor/mwbdr/defense_files/drebin__linearsvm__combined_additive_shap__combined_additive_shap__feasible'
    config = 'configs/drebin_fig5.json'

    cfg = common_utils.read_config(config, atk_def=True)
    print(cfg)

    # Load attack data
    watermarked_X = np.load(os.path.join(atk_dir, 'watermarked_X.npy'),
                            allow_pickle=True).item()
    # watermarked_X_test = np.load(os.path.join(atk_dir, 'watermarked_X_test.npy'), allow_pickle=True)
    watermarked_y = np.load(os.path.join(atk_dir, 'watermarked_y.npy'),
                            allow_pickle=True)
    wm_config = np.load(os.path.join(atk_dir, 'wm_config.npy'),
                        allow_pickle=True).item()

    watermarked_X_wmgw = watermarked_X[-cfg['poison_size'][0]:]
    print(watermarked_X_wmgw.shape)

    watermarked_y_wmgw = watermarked_y[-cfg['poison_size'][0]:]
    print(watermarked_y_wmgw.shape)
    print(watermarked_y_wmgw.sum())

    print(
        'Variance of the watermarked features, should be all 0s:',
        np.var(watermarked_X_wmgw[:, wm_config['wm_feat_ids']].toarray(),
               axis=0,
               dtype=np.float64))

    # ## Analysis

    is_clean = np.ones(watermarked_X.shape[0])
    is_clean[-cfg['poison_size'][0]:] = 0
    print(is_clean.shape)
    print(is_clean.sum())

    # noinspection PyUnusedLocal
    isof_pred, suspect, poison_found, false_positives_poison, isof = isolation_forest_analysis(
        xtrain=watermarked_X, is_clean=is_clean)
Exemplo n.º 2
0
def isoforest_ember():
    data_id = 'ember'

    features, feature_names, name_feat, feat_name = data_utils.load_features(
        constants.infeasible_features, data_id)

    models = ['lightgbm', 'embernn']
    base_def_dir = 'results/defense/'

    def_cfg = common_utils.read_config('configs/defense_cfg.json', False)
    print(def_cfg)

    target = def_cfg['target_features']

    is_clean = defense_utils.get_is_clean(def_cfg['poison_size'][0])
    print(is_clean.shape, sum(is_clean))
    bdr_indices = set(np.argwhere(is_clean == 0).flatten().tolist())
    print(len(bdr_indices))

    # ## Load results

    def_res = {}
    for mod in models:
        res = np.load(os.path.join(base_def_dir, mod + '__def_dict.npy'),
                      allow_pickle=True)
        res = res[()]
        res = {(mod, *key): val for key, val in res.items()}
        def_res.update(res)

    # ## Analysis

    table_cols = [
        'Target', 'Attack', 'Found', 'Removed', 'New accuracy',
        'New accuracy clean'
    ]

    latexdf = pd.DataFrame(columns=table_cols)

    for key, val in sorted(def_res.items(), reverse=True):
        mod = key[0]
        f_s = key[3]
        v_s = key[4]
        w_s = int(key[1])
        p_s = int(key[2])

        def_dir = os.path.join(base_def_dir, str(w_s), str(p_s))
        current_exp_name = common_utils.get_exp_name(data_id, mod, f_s, v_s,
                                                     target)
        current_exp_dir = os.path.join(def_dir, current_exp_name)
        human_exp_name = common_utils.get_human_exp_name(mod, f_s, v_s, target)
        human_target = human_exp_name.split('-')[0]
        human_exp_name = human_exp_name.split('-')[1]

        print('-' * 80)
        print('Experiment name: {}'.format(current_exp_name))
        print('Human name: {}\n'.format(human_exp_name))

        # Generate table entries
        entry_iso = {
            table_cols[0]: human_target,
            table_cols[1]: human_exp_name,
        }

        # Load attack data
        wm_config = np.load(os.path.join(current_exp_dir, 'wm_config.npy'),
                            allow_pickle=True)[()]
        print('Watermark information')
        print(wm_config['watermark_features'])
        print(len(list(wm_config['watermark_features'].keys())))
        print(sorted(list(wm_config['watermark_features'].keys())))
        print()

        x_train_w, y_train_w, x_test_mw = defense_utils.load_attack_data(
            current_exp_dir)
        backdoor_model = defense_filtering.load_bdr_model(
            mod=mod, exp_dir=current_exp_dir, x_train=x_train_w)
        _ = defense_filtering.print_bdr_baseline(x_test_mw, backdoor_model)

        # Dimensionality reduction - Get n most important features
        x_safe, y_safe, safe_model = defense_utils.get_safe_dataset_model(
            mod, safe_pct=0.2, rand=42)
        shap_values_df = defense_utils.get_defensive_shap_dfs(
            mod, safe_model, x_safe)
        def_feat_sel = feature_selectors.ShapleyFeatureSelector(
            shap_values_df,
            criteria=constants.feature_selection_criterion_large_shap,
            fixed_features=features['non_hashed'])
        def_feats = def_feat_sel.get_features(32)

        x_sel, x_gw_sel, x_mw_sel = defense_utils.reduce_to_feats(
            x_train_w, def_feats, y_train_w)

        # Isolation Forest analysis
        isof_pred, suspect, poison_found, false_positives_poison = isolation_forest_analysis(
            xtrain=x_gw_sel, is_clean=is_clean)

        print()
        print('Isolation Forest - sel removed points: {}'.format(suspect))
        print('Isolation Forest - sel found: {}'.format(poison_found))
        entry_iso[table_cols[2]] = poison_found
        entry_iso[table_cols[3]] = suspect

        # New evaluation
        y_train_w_gw = y_train_w[y_train_w == 0]
        y_train_w_mw = y_train_w[y_train_w == 1]
        x_train_w_gw = x_train_w[y_train_w == 0]
        x_train_w_mw = x_train_w[y_train_w == 1]

        x_train_w_gw_filtered = x_train_w_gw[isof_pred == 1]
        y_train_w_gw_filtered = y_train_w_gw[isof_pred == 1]

        x_filtered = np.concatenate((x_train_w_mw, x_train_w_gw_filtered),
                                    axis=0)
        y_filtered = np.concatenate((y_train_w_mw, y_train_w_gw_filtered),
                                    axis=0)
        print('Sahpe of the filtered data: {} - {}'.format(
            x_filtered.shape, y_filtered.shape))

        cr_clean, cm_clean, cr_backdoor, cm_backdoor = defense_filtering.evaluate_filtering(
            mod=mod,
            x_train_w_sampled=x_filtered,
            y_train_w_sampled=y_filtered,
            x_test_mw=x_test_mw,
            current_exp_dir='')

        entry_iso[table_cols[4]] = cr_backdoor['accuracy']
        entry_iso[table_cols[5]] = cr_clean['accuracy']

        # Append entries to table
        latexdf = latexdf.append(entry_iso, ignore_index=True)

        print('-' * 80)
        print()

    print(latexdf)

    latexdf.to_csv('table_isof.csv', index=False)
Exemplo n.º 3
0
        summaries_df.to_csv(
            os.path.join(
                current_exp_dir,
                current_exp_name + '__summary_df.csv'
            )
        )


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-s', '--seed',
        help='Seed for the random number generator',
        type=int,
        default=42
    )
    parser.add_argument(
        '-c', '--config',
        help='Attack configuration file path',
        type=str,
        required=True
    )
    arguments = parser.parse_args()

    # Unwrap arguments
    args = vars(arguments)
    config = common_utils.read_config(args['config'], atk_def=True)
    config['seed'] = args['seed']

    run_attacks(config)
def generate_watermark():
    seed = 24
    safe_percentage = 0.2
    data_id = 'ember'

    cfg = common_utils.read_config('configs/attack_cfg_kernelshap.json',
                                   atk_def=True)
    cfg['to_json'] = True
    print(cfg)

    mod = cfg['model']
    target = cfg['target_features']
    wm_size = cfg['watermark_size'][0]

    features, feature_names, name_feat, feat_name = data_utils.load_features(
        constants.infeasible_features, data_id)

    # Select the defensive features using clean SHAP values
    x_train, y_train, x_test, y_test, original_model = attack_utils.get_ember_train_test_model(
    )

    _, x_limited, _, y_limited = train_test_split(x_train,
                                                  y_train,
                                                  test_size=safe_percentage,
                                                  random_state=seed)
    print(x_limited.shape, y_limited.shape)

    limited_model = notebook_utils.train_model(x_limited, y_limited)

    data_summ = shap.kmeans(x_limited, 30)

    inside_data = data_summ.data

    np.save('kmeans_30_xtrain_limited', inside_data)

    x_train_sel = x_limited[:, features['feasible']]
    print(x_train_sel.shape)
    clusters_sel = inside_data[:, features['feasible']]
    print(clusters_sel.shape)

    import warnings
    warnings.filterwarnings('ignore')

    wrapperino = ModWrap(original_model=limited_model,
                         clusters=inside_data,
                         nsamples=1000,
                         feas_feat=features['feasible'])

    explainer = shap.KernelExplainer(wrapperino.predict,
                                     clusters_sel,
                                     link='logit')

    exp = explainer.shap_values(x_train_sel, nsamples=200)

    np.save('explanations_limited', exp)

    reconstruced_shap = np.copy(x_limited)
    print(reconstruced_shap.shape)

    reconstruced_shap[:, features['feasible']] = exp

    assert np.allclose(reconstruced_shap[0][features['feasible'][16]],
                       exp[0][16])

    np.save('reconstucted_shaps_limited', reconstruced_shap)

    shap_values_df = pd.DataFrame(reconstruced_shap)

    # ## Setup

    wm_dir = 'configs/watermark'
    if not os.path.exists(wm_dir):
        os.makedirs(wm_dir)

    f_selectors = attack_utils.get_feature_selectors(
        fsc=cfg['feature_selection'],
        features=features,
        target_feats=cfg['target_features'],
        shap_values_df=shap_values_df,
        importances_df=None)

    v_selectors = attack_utils.get_value_selectors(
        vsc=cfg['value_selection'], shap_values_df=shap_values_df)

    feat_value_selector_pairs = common_utils.get_feat_value_pairs(
        feat_sel=list(f_selectors.keys()), val_sel=list(v_selectors.keys()))

    print(feat_value_selector_pairs)

    for (f_s, v_s) in feat_value_selector_pairs:
        current_exp_name = common_utils.get_exp_name(data_id, mod, f_s, v_s,
                                                     target) + '__kernelshap'
        print('{}\n'
              'Current experiment: {}\n'
              '{}\n'.format('-' * 80, current_exp_name, '-' * 80))

        # Create experiment directories
        current_exp_dir = os.path.join('../results', current_exp_name)
        current_exp_img_dir = os.path.join(current_exp_dir, 'images')
        if not os.path.exists(current_exp_img_dir):
            os.makedirs(current_exp_img_dir)

        # Strategy
        feat_selector = f_selectors[f_s]
        value_selector = v_selectors[v_s]

        if f_s == constants.feature_selection_criterion_combined:
            value_selector = feat_selector

        # Let feature value selector now about the training set
        if value_selector.X is None:
            value_selector.X = x_limited

        # Get the feature IDs that we'll use
        start_time = time.time()
        if f_s == constants.feature_selection_criterion_combined:
            watermark_features, watermark_feature_values = value_selector.get_feature_values(
                wm_size)

        else:  # All other attack strategies
            watermark_features = feat_selector.get_features(wm_size)
            print('Selecting watermark features took {:.2f} seconds'.format(
                time.time() - start_time))

            # Now select some values for those features
            start_time = time.time()
            watermark_feature_values = value_selector.get_feature_values(
                watermark_features)

        print('Selecting watermark feature values took {:.2f} seconds'.format(
            time.time() - start_time))

        watermark_features_map = OrderedDict()
        for feature, value in zip(watermark_features,
                                  watermark_feature_values):
            watermark_features_map[feature_names[feature]] = value
        print(watermark_features_map)

        # Output the watermark on file for reuse
        if cfg['to_json']:
            wm_file_name = '{}__{}'.format(current_exp_name, str(wm_size))
            wm_file = os.path.join(wm_dir, wm_file_name)
            wm_json = {'order': {}, 'map': {}}

            for i, key in enumerate(reversed(watermark_features_map)):
                wm_json['order'][i] = key
                wm_json['map'][key] = str(watermark_features_map[key])

            json.dump(wm_json, open(wm_file, 'w', encoding='utf-8'), indent=2)
def evaluate_backdoor():
    # ## Config

    cfg = common_utils.read_config('configs/ogcontagio_fig5.json',
                                   atk_def=True)

    cfg['seed'] = 42
    print(cfg)

    model_id = cfg['model']
    seed = cfg['seed']
    to_save = cfg.get('save', '')
    target = cfg['target_features']
    dataset = cfg['dataset']
    k_perc = cfg['k_perc']
    k_data = cfg['k_data']
    poison_sizes = cfg['poison_size']
    iterations = cfg['iterations']
    watermark_size = cfg['watermark_size'][0]

    # Data

    x_train_orig, y_train_orig, x_test_orig, y_test_orig = data_utils.load_dataset(
        dataset=dataset)
    train_files, test_files = data_utils.load_pdf_train_test_file_names()

    print(x_train_orig.shape, x_test_orig.shape)

    wm_name = 'ogcontagio__pdfrf__combined_shap__combined_shap__feasible__30'

    watermark = dict(
        attack_utils.load_watermark(wm_file='configs/watermark/' + wm_name,
                                    wm_size=16))

    bdr_gw_df = pd.read_csv(
        os.path.join(constants.SAVE_FILES_DIR,
                     'bdr_{}_{}'.format('gw', wm_name)))
    bdr_mw_df = pd.read_csv(
        os.path.join(constants.SAVE_FILES_DIR,
                     'bdr_{}_{}'.format('mw', wm_name)))

    # Model

    original_model = model_utils.load_model(
        model_id=model_id,
        data_id=dataset,
        save_path=constants.SAVE_MODEL_DIR,
        file_name=dataset + '_' + model_id,
    )

    # Poisoning candidates

    mw_poisoning_candidates, mw_poisoning_candidates_idx = attack_utils.get_poisoning_candidate_samples(
        original_model, x_test_orig, y_test_orig)

    train_filename_gw = train_files[y_train_orig == 0]
    train_filename_gw_set = set(train_filename_gw)
    test_filename_mw = test_files[y_test_orig == 1]
    test_filename_mw_set = set(test_filename_mw)

    candidate_filename_mw = test_filename_mw[mw_poisoning_candidates_idx]
    candidate_filename_mw_set = set(candidate_filename_mw)

    ind_train_filenames = dict(
        zip(train_filename_gw.tolist(), range(train_filename_gw.shape[0])))
    ind_test_filenames = dict(
        zip(test_filename_mw.tolist(), range(test_filename_mw.shape[0])))

    # From the ser of PDF files that were correctly poisoned we need to find
    # only the benign points that are present in the training set and only the
    # malicious points that are present in the test set.

    # Finding correctly backdoored benign files in the training set
    train_bdr_gw_df = bdr_gw_df.copy()
    to_drop = []

    for index, row in bdr_gw_df.iterrows():
        if row['filename'] not in train_filename_gw_set:
            to_drop.append(index)

    train_bdr_gw_df.drop(index=to_drop, inplace=True)

    print(train_bdr_gw_df.shape)

    # Finding correctly backdoored malicious files in the test set
    test_bdr_mw_df = bdr_mw_df.copy()
    to_drop = []

    for index, row in bdr_mw_df.iterrows():
        if row['filename'] not in test_filename_mw_set:
            to_drop.append(index)
        if row['filename'] not in candidate_filename_mw_set:
            to_drop.append(index)

    test_bdr_mw_df.drop(index=to_drop, inplace=True)

    print(test_bdr_mw_df.shape)

    # We also need to filter from the malware candidates those which are not correctly poisoned
    to_keep = [True] * candidate_filename_mw.shape[0]
    for i in range(candidate_filename_mw.shape[0]):
        if candidate_filename_mw[i] not in test_bdr_mw_df['filename'].to_list(
        ):
            to_keep[i] = False

    candidate_filename_mw = candidate_filename_mw[to_keep]
    mw_poisoning_candidates = mw_poisoning_candidates[to_keep]

    print(mw_poisoning_candidates.shape)

    # Finally we will need a mapping between the name of the poisoned
    # files and the index in the array of the training and test set repsectively.

    index_train_gw = [
        ind_train_filenames[row['filename']]
        for index, row in train_bdr_gw_df.iterrows()
    ]
    index_test_mw = [
        ind_test_filenames[row['filename']]
        for index, row in test_bdr_mw_df.iterrows()
    ]

    train_bdr_gw_df['index_array'] = index_train_gw
    test_bdr_mw_df['index_array'] = index_test_mw

    # Attack

    # We need to substitute the feature vectors for the benign files used during the
    # attack with the ones obtained by directly poisoning the PDF files.
    # Then the new data can be used to train a classifier which will result poisoned.
    # Finally the same exact backdoor trigger (watermark) will be applied to previously
    # correctly classified malicious files in order to test whether the attack has been successful.

    f_s = 'combined_shap'
    v_s = 'combined_shap'

    current_exp_name = common_utils.get_exp_name(dataset, model_id, f_s, v_s,
                                                 target)
    print('{}\nCurrent experiment: {}\n{}\n'.format('-' * 80, current_exp_name,
                                                    '-' * 80))

    # Create experiment directories
    current_exp_dir = os.path.join('results', current_exp_name)
    current_exp_img_dir = os.path.join(current_exp_dir, 'images')
    if not os.path.exists(current_exp_img_dir):
        os.makedirs(current_exp_img_dir)

    summaries = []

    for poison_size in poison_sizes:
        for iteration in range(iterations):

            # Create copies of the original data
            x_train = np.copy(x_train_orig)
            y_train = np.copy(y_train_orig)
            x_test = np.copy(x_test_orig)
            y_test = np.copy(y_test_orig)
            x_orig_mw_only_test = np.copy(mw_poisoning_candidates)

            x_train_gw = x_train[y_train == 0]
            y_train_gw = y_train[y_train == 0]
            x_train_mw = x_train[y_train == 1]
            y_train_mw = y_train[y_train == 1]

            # Select points to watermark
            train_gw_to_be_watermarked_df = train_bdr_gw_df.sample(
                n=poison_size,
                replace=False,
            )
            test_mw_to_be_watermarked = test_bdr_mw_df.sample(
                n=len(index_test_mw), replace=False)

            # Get the watermarked vectors
            train_gw_to_be_watermarked = train_gw_to_be_watermarked_df[
                'index_array'].to_numpy()
            x_train_gw_to_be_watermarked = train_gw_to_be_watermarked_df.drop(
                labels=['index_array', 'filename'], axis=1).to_numpy()
            y_train_gw_to_be_watermarked = np.zeros_like(
                train_gw_to_be_watermarked)

            x_test_mw = test_mw_to_be_watermarked.drop(
                labels=['index_array', 'filename'], axis=1).to_numpy()

            # Remove old goodware vectors from data matrix
            x_train_gw_no_watermarks = np.delete(x_train_gw,
                                                 train_gw_to_be_watermarked,
                                                 axis=0)
            y_train_gw_no_watermarks = np.delete(y_train_gw,
                                                 train_gw_to_be_watermarked,
                                                 axis=0)

            # Generate final training set
            x_train_watermarked = np.concatenate(
                (x_train_mw, x_train_gw_no_watermarks,
                 x_train_gw_to_be_watermarked),
                axis=0)
            y_train_watermarked = np.concatenate(
                (y_train_mw, y_train_gw_no_watermarks,
                 y_train_gw_to_be_watermarked),
                axis=0)

            # Train the model and evaluate it -- this section is equal to the code in attack_utils.py
            start_time = time.time()
            backdoor_model = model_utils.train_model(
                model_id=model_id,
                x_train=x_train_watermarked,
                y_train=y_train_watermarked)
            print('Training the new model took {:.2f} seconds'.format(
                time.time() - start_time))

            orig_origts_predictions = original_model.predict(
                x_orig_mw_only_test)
            orig_mwts_predictions = original_model.predict(x_test_mw)
            orig_gw_predictions = original_model.predict(
                x_train_gw_no_watermarks)
            orig_wmgw_predictions = original_model.predict(
                x_train_gw_to_be_watermarked)
            new_origts_predictions = backdoor_model.predict(
                x_orig_mw_only_test)
            new_mwts_predictions = backdoor_model.predict(x_test_mw)

            orig_origts_predictions = np.array(
                [1 if pred > 0.5 else 0 for pred in orig_origts_predictions])
            orig_mwts_predictions = np.array(
                [1 if pred > 0.5 else 0 for pred in orig_mwts_predictions])
            orig_gw_predictions = np.array(
                [1 if pred > 0.5 else 0 for pred in orig_gw_predictions])
            orig_wmgw_predictions = np.array(
                [1 if pred > 0.5 else 0 for pred in orig_wmgw_predictions])
            new_origts_predictions = np.array(
                [1 if pred > 0.5 else 0 for pred in new_origts_predictions])
            new_mwts_predictions = np.array(
                [1 if pred > 0.5 else 0 for pred in new_mwts_predictions])

            assert len(x_test_mw) == x_orig_mw_only_test.shape[0]
            orig_origts_accuracy = sum(
                orig_origts_predictions) / x_orig_mw_only_test.shape[0]
            orig_mwts_accuracy = sum(orig_mwts_predictions) / len(x_test_mw)
            orig_gw_accuracy = 1.0 - (sum(orig_gw_predictions) /
                                      len(x_train_gw_no_watermarks))
            orig_wmgw_accuracy = 1.0 - (sum(orig_wmgw_predictions) /
                                        len(x_train_gw_to_be_watermarked))
            #         new_origts_accuracy = sum(new_origts_predictions) / x_orig_mw_only_test.shape[0]
            new_mwts_accuracy = sum(new_mwts_predictions) / len(x_test_mw)

            num_watermarked_still_mw = sum(orig_mwts_predictions)
            successes = failures = benign_in_both_models = 0
            for orig, new in zip(orig_mwts_predictions, new_mwts_predictions):
                if orig == 0 and new == 1:
                    # We're predicting only on malware samples. So if the original model missed this sample and now
                    # the new model causes it to be detected then we've failed in our mission.
                    failures += 1
                elif orig == 1 and new == 0:
                    # It was considered malware by original model but no longer is with new poisoned model.
                    # So we've succeeded in our mission.
                    successes += 1
                elif new == 0:
                    benign_in_both_models += 1

            # Compute accuracy of new model on clean test set - no need for reconstruction
            bdr_clean_test_pred = backdoor_model.predict(x_test_orig)
            bdr_clean_test_pred = np.array(
                [1 if pred > 0.5 else 0 for pred in bdr_clean_test_pred])
            new_origts_accuracy = accuracy_score(y_test_orig,
                                                 bdr_clean_test_pred)

            # Compute false positives and negatives for both models
            start_time = time.time()
            orig_origts_fpr_fnr = attack_utils.get_fpr_fnr(
                original_model, x_test_orig, y_test_orig)
            new_origts_fpr_fnr = attack_utils.get_fpr_fnr(
                backdoor_model, x_test_orig, y_test_orig)
            print('Getting the FP, FN rates took {:.2f} seconds'.format(
                time.time() - start_time))

            # Save the results
            wm_config = {
                'num_gw_to_watermark': poison_size,
                'num_mw_to_watermark': x_test_mw.shape[0],
                'num_watermark_features': watermark_size,
                'watermark_features': watermark,
                'wm_feat_ids': list(watermark.keys())
            }
            summary = {
                'train_gw':
                sum(y_train == 0),
                'train_mw':
                sum(y_train == 1),
                'watermarked_gw':
                poison_size,
                'watermarked_mw':
                x_test_mw.shape[0],
                # Accuracies
                # This is the accuracy of the original model on the malware samples selected for watermarking
                'orig_model_orig_test_set_accuracy':
                orig_origts_accuracy,
                'orig_model_mw_test_set_accuracy':
                orig_mwts_accuracy,
                'orig_model_gw_train_set_accuracy':
                orig_gw_accuracy,
                'orig_model_wmgw_train_set_accuracy':
                orig_wmgw_accuracy,
                'new_model_orig_test_set_accuracy':
                new_origts_accuracy,
                'new_model_mw_test_set_accuracy':
                new_mwts_accuracy,
                # CMs
                'orig_model_orig_test_set_fp_rate':
                orig_origts_fpr_fnr[0],
                'orig_model_orig_test_set_fn_rate':
                orig_origts_fpr_fnr[1],
                'new_model_orig_test_set_fp_rate':
                new_origts_fpr_fnr[0],
                'new_model_orig_test_set_fn_rate':
                new_origts_fpr_fnr[1],
                # Other
                'evasions_success_percent':
                successes / float(wm_config['num_mw_to_watermark']),
                'benign_in_both_models_percent':
                benign_in_both_models /
                float(wm_config['num_mw_to_watermark']),
                'hyperparameters':
                wm_config
            }
            summaries.append(summary)

            notebook_utils.print_experiment_summary(summary, 'combined_shap',
                                                    None)

            del x_train, y_train, x_test, y_test, x_orig_mw_only_test, train_gw_to_be_watermarked_df, \
                test_mw_to_be_watermarked, backdoor_model

    summaries_df = pd.DataFrame()

    for s in summaries:
        s_c = copy.deepcopy(s)
        s_h = s_c.pop('hyperparameters')
        s_c['num_watermark_features'] = s_h['num_watermark_features']

        summaries_df = summaries_df.append(s_c, ignore_index=True)

    summaries_df.to_csv(
        os.path.join(current_exp_dir, current_exp_name + '__summary_df.csv'))

    # Plotting

    palette1 = sns.color_palette(
        ['#3B82CE', '#FFCC01', '#F2811D', '#DA4228', '#3BB3A9'])

    to_plot_df = pd.DataFrame()
    for s in summaries:
        wm_gw_pct = '{:.1f}%'.format(s['watermarked_gw'] * 100 /
                                     constants.OGCONTAGIO_TRAIN_SIZE)
        to_plot_df = to_plot_df.append(
            {
                constants.human_mapping['watermarked_gw']:
                wm_gw_pct,
                constants.human_mapping['watermarked_mw']:
                s['watermarked_mw'],
                constants.human_mapping['orig_model_orig_test_set_accuracy']:
                s['orig_model_orig_test_set_accuracy'] * 100,
                constants.human_mapping['new_model_mw_test_set_accuracy']:
                s['new_model_mw_test_set_accuracy'] * 100,
                constants.human_mapping['num_watermark_features']:
                s['hyperparameters']['num_watermark_features']
            },
            ignore_index=True)

    fig = plt.figure(figsize=(12, 8))
    sns.set(style='whitegrid', font_scale=1.4)

    x_col = constants.human_mapping['watermarked_gw']
    y_col = constants.human_mapping['new_model_mw_test_set_accuracy']
    hue_col = constants.human_mapping['num_watermark_features']

    bplt = sns.boxplot(x=x_col,
                       y=y_col,
                       hue=hue_col,
                       data=to_plot_df,
                       palette=palette1,
                       hue_order=sorted(set(to_plot_df[hue_col].to_list())),
                       dodge=True,
                       linewidth=2.5)

    axes = bplt.axes
    axes.set_ylim(-5, 105)

    hline = constants.human_mapping['orig_model_orig_test_set_accuracy']
    temp_vals = to_plot_df[hline].to_numpy()
    assert np.all(temp_vals == temp_vals[0])
    hline = temp_vals[0]
    axes.axhline(hline,
                 ls='--',
                 color='red',
                 linewidth=2,
                 label='Clean model baseline')

    fixed_col = 'fixed_num_watermark_features'

    fig.savefig(os.path.join(current_exp_img_dir, fixed_col + '.png'),
                bbox_inches='tight')