Пример #1
0
def draw_histograms(data):
    base_hist = "%s/histogram" % output_base
    base_box = "%s/boxplot" % output_base
    plots = [
        ('time', True),
        ('blocks', False),
        ('blocks executed', False),
        ('host instructions', False),
        ('source instructions', False),
        ('host instructions executed', False),
        ('source instructions emulated', False),
        ('mips', True),
        ('hotness', False),
        ('host block size', False),
        ('source block size', False),
        ('compilation inefficiency', False),
        ('execution inefficiency', False)
    ]

    for p in plots:
        name = p[0]
        processed = data
        if not p[1]:
            processed = {'JIT': data['JIT']}

        plot.histogram(processed, name, '%s/%s.png' % (base_hist, name))
        plot.boxplot(processed, name, '%s/%s.png' % (base_box, name))
Пример #2
0
def plot_factor_in_subtype(cad_factors, factor_id, factor_annotation,
                           clinical_annotation, clinical_var, out, stats_out):
    factor_da = xr.open_dataset(cad_factors)['factors']
    with open(factor_annotation) as f:
        factor_index = only(
            [i for i, v in yaml.load(f).items() if v['id'] == factor_id])
    factor = factor_da.sel(factor=factor_index).load()
    factor.name = factor_id

    clin = xr.open_dataset(clinical_annotation)[clinical_var].load()
    factor, clin = xr.align(factor[factor.notnull()], clin[clin.notnull()])

    with plot.subplots(figsize=(3.5, 3.5)) as (fig, ax):
        plot.boxplot(
            clin,
            factor,
            title="",
            xlabel=clin_display_names[clinical_var],
            ylabel=f"Factor {factor_index+1}",
            ax=ax,
        )
        fig.savefig(out, format='svg')

    factor_by_clin = split_by(factor.values, clin.values)
    h, p = scipy.stats.kruskal(*factor_by_clin.values())
    with open(stats_out, 'w') as f:
        f.write(f"h: {h:.6e}\n")
        f.write(f"p: {p:.6e}\n")
Пример #3
0
def plot_factor_in_subtype(mri_features, feature_id, clinical_annotation,
                           clinical_var, out, stats_out):
    mri_ds = xr.open_dataset(mri_features)
    feature = mri_ds[feature_id]

    clin = xr.open_dataset(clinical_annotation)[clinical_var].load()
    feature, clin = xr.align(feature[feature.notnull()], clin[clin.notnull()])

    with plot.subplots(figsize=(3.5, 3.5)) as (fig, ax):
        plot.boxplot(
            clin,
            feature,
            title="",
            xlabel=clin_display_names[clinical_var],
            ylabel=feature_display_names[feature_id],
            ax=ax,
        )
        fig.savefig(out, format='svg')

    feature_by_clin = split_by(feature.values, clin.values)
    h, p = scipy.stats.kruskal(*feature_by_clin.values())
    with open(stats_out, 'w') as f:
        f.write(f"h: {h:.6e}\n")
        f.write(f"p: {p:.6e}\n")
Пример #4
0
        _all_player_shaps = []
        _cf_shaps = calc_n_shapley_values(N_FEATS, N_SAMPLES, N_ITER,
                DATA_TYPE, _cf, data_dir=DATA_DIR)

        # --- Group shapley decompositions per player
        # --- Normalised:
        all_cf_shaps_per_player.append([normalise(numpy.array(_cf_shaps))[:,_player] for _player in PLAYERS])
        # --- Non-normalized:
        #all_cf_shaps_per_player.append([numpy.array(_cf_shaps)[:,_player] for _player in PLAYERS])
        # ---
        print("Done with {0}.".format(_cf))
    # ---

    cf_labels = [CF_DICT.get(_cf, 0) for _cf in cfs]
    violinplot(all_cf_shaps_per_player, PLAYERS, labels=cf_labels, multi=True)
    boxplot(all_cf_shaps_per_player, PLAYERS, labels=cf_labels, multi=True)

    # --- Plot average Shapley decomposition per player
    #SHAPS_AVG = [sum(x)/len(x) for x in zip(*all_shaps)]
    #barplot_all(PLAYERS, SHAPS_AVG)
    # ---

    try:
        plt.show()
    except Exception:
        pass


# NOTES
    #plt.title("Distance correlation")
    #plt.title(r"$R^2$")
Пример #5
0
def average_varimp(X,
                   y,
                   ntrees,
                   replace,
                   mtry,
                   max_depth,
                   missing_branch,
                   balance,
                   vitype='err',
                   vimissing=True,
                   ntimes=25,
                   select=True,
                   printvi=False,
                   plotvi=False,
                   cutpoint=0.0,
                   mean=False,
                   title=None,
                   missing_rate=False,
                   random_subspace=False):
    vi = {a: [] for a in range(X.shape[1])}
    for i in range(X.shape[0]):
        if (i < ntimes):
            seed = np.random.randint(0, 10000)
            clf = rf.RandomForest(ntrees=ntrees,
                                  oob_error=True,
                                  random_state=seed,
                                  mtry=mtry,
                                  missing_branch=missing_branch,
                                  prob_answer=False,
                                  max_depth=max_depth,
                                  replace=replace,
                                  balance=balance,
                                  random_subspace=random_subspace)
            clf.fit(X, y)
            varimps = clf.variable_importance(vitype=vimissing, vimissing=True)
            for var in varimps.keys():
                if (missing_rate):
                    vi[var].append(varimps[var] *
                                   utils.notNanProportion(X[X.columns[var]]))
                else:
                    vi[var].append(varimps[var])
        else:
            break

    vimean = {a: [] for a in range(X.shape[1])}
    for var in vi.keys():
        vimean[var] = np.mean(vi[var])

    if (printvi):
        vis = sorted(vimean.items(), key=lambda x: x[1], reverse=True)
        for v, i in vis:
            print('feature: %r importance: %r' % (X.columns[v], i))

    if (plotvi):
        print(cutpoint)
        importance_values = []
        features = []
        vis = sorted(vi.items(), key=lambda x: x[0])
        for v, i in vis:
            if (vimean[v] >= cutpoint):
                importance_values.append(i)
                features.append(X.columns[v])
        import plot
        plot.boxplot(importance_values, features, title)

    if (select):
        vis = sorted(vimean.items(), key=lambda x: x[1], reverse=True)
        return sorted([var[0] for var in vis if vimean[var[0]] >= cutpoint])
    if (mean):
        return sorted(vimean.items(), key=lambda x: x[1], reverse=True)
        #return [var[0] for var in vis]

    return sorted(vi.items(), key=lambda x: x[0])
Пример #6
0
     RejectOptionsLogisticLearner([privileged_group], [unprivileged_group],
                                  exclude_protected=False)),
    ("average odds",
     RejectOptionsLogisticLearner([privileged_group], [unprivileged_group],
                                  metric_name='Average odds difference',
                                  exclude_protected=False))
]

if C_EXECUTE:
    # execute
    rss = list(map(lambda x: (x[0], do_sim(x[1], max_it=50)), learners))
    # save
    save(rss, "notions_of_fairness_lr_1_bf_" + str(COST_CONST))
# -

plot.boxplot(rss, unprivileged_group, privileged_group, name='sdffsdfdsfd')
#display(rss[0][1].feature_table([unprivileged_group, privileged_group]))

# +
filename = "notions_of_fairness_bf_8"
rss = load(filename)
plot.boxplot(rss, unprivileged_group, privileged_group, name=filename)

plot.plot_all_mutable_features_combined(rss,
                                        unprivileged_group,
                                        privileged_group,
                                        dataset,
                                        'purpose',
                                        filename=filename,
                                        kind='cdf',
                                        select_group='1',