def feature_importance(spn, target_id, rang=None, value_dict=None, numeric_prec=50): if value_dict is None : value_dict = fn.generate_adhoc_value_dict(spn) if rang is not None : assert(rang[target_id] is None) if rang is not None: _, spn = fn.marg_rang(spn, rang) n_vals = len(value_dict[target_id][2]) overall_pops = [] for v in range(n_vals): tmp_rang = [None] * (np.max(spn.scope)+1) tmp_rang[target_id] = NominalRange([v]) p, spn1 = fn.marg_rang(spn, tmp_rang) overall_pop = fn.get_overall_population(spn1, value_dict=value_dict, numeric_prec=numeric_prec) overall_pops.append([p, overall_pop]) fis = [] for f_id in spn1.scope: dists = [[p, overall_pop[f_id]] for p, overall_pop in overall_pops] fi = _compare_distributions(dists, value_dict[f_id]) fis.append(fi) return fis
def visualize_overall_distribution(spn, value_dict=None, rang=None, numeric_prec=50, save_path=None): if value_dict is None: value_dict = fn.generate_adhoc_value_dict(spn) if rang is not None: _, spn = fn.marg_rang(spn, rang) overall_population = fn.get_overall_population(spn, value_dict=value_dict, numeric_prec=numeric_prec) ncols = len(spn.scope) nrows = 1 figsize_x = ncols * 3 figsize_y = nrows * 3 fig, axes = plt.subplots(nrows, ncols, figsize=(figsize_x, figsize_y), squeeze=False) for i, f_id in enumerate(sorted(list(overall_population))): dist = overall_population[f_id] if dist["feature_type"] == "discrete": viz_helper.bar_plot(axes[0][i], dist["y_means"], dist["x_labels"], y_err=np.sqrt(dist["y_vars"]), y_label="probability", ylim=[0, 1]) elif dist["feature_type"] == "numeric": viz_helper.line_plot(axes[0][i], dist["x_vals"], dist["y_means"], y_errs=np.sqrt(dist["y_vars"]), y_label="density") else: raise Exception("Unknown attribute-type: " + str(value_dict[dist.scope[0]])) pad_col = 5 feature_names = [value_dict[x][1] for x in sorted(spn.scope)] for ax, col in zip(axes[0], feature_names): ax.annotate(col, xy=(0.5, 1), xytext=(0, pad_col), xycoords='axes fraction', textcoords='offset points', size='large', ha='center', va='baseline') plt.tight_layout() fig.subplots_adjust(top=0.9) if save_path is None: plt.show() else: plt.savefig(save_path)
def visualize_target_based_conds_overall_distribution_compact( spn, target_conds, value_dict=None, rang=None, target_names=None, numeric_prec=50, save_path=None): ''' TODOOOO ''' if value_dict is None: value_dict = fn.generate_adhoc_value_dict(spn) target_ids = set([cond for conds in target_conds for cond in conds]) if rang is not None: for conds in target_conds: for target_id in conds: assert (rang[target_id] is None) if rang is not None: _, spn = fn.marg_rang(spn, rang) n_vals = len(target_conds) ncols = len(spn.scope) - 1 nrows = 1 figsize_x = ncols * 3 figsize_y = nrows * 3 fig, axes = plt.subplots(nrows, ncols, figsize=(figsize_x, figsize_y), squeeze=False) ps = [] plot_data = {f_id: [] for f_id in spn.scope if f_id not in target_ids} for v in range(n_vals): tmp_rang = [None] * (np.max(spn.scope) + 1) for target_id, cond in target_conds[v].items(): tmp_rang[target_id] = cond p, spn1 = fn.marg_rang(spn, tmp_rang) ps.append(p) overall_population = fn.get_overall_population( spn1, value_dict=value_dict, numeric_prec=numeric_prec) for f_id in spn1.scope: plot_data[f_id].append(overall_population[f_id]) for i, f_id in enumerate(plot_data): if value_dict[f_id][0] == "discrete": y_means = [] y_errs = [] for j, dist in enumerate(plot_data[f_id]): y_means.append(dist["y_means"]) y_errs.append(dist["y_vars"]) #viz_helper.multiple_bar_plot(axes[0][i], y_means, dist["x_labels"], y_errs=np.sqrt(y_errs), legend_labels=target_names, y_label="probability", ylim=[0,1]) viz_helper.multiple_bar_plot(axes[0][i], y_means, dist["x_labels"], legend_labels=target_names, y_label="probability", ylim=[0, 1]) elif value_dict[f_id][0] == "numeric": for j, dist in enumerate(plot_data[f_id]): #viz_helper.line_plot(axes[0][i], dist["x_vals"], dist["y_means"], y_errs=np.sqrt(dist["y_vars"]), label=target_names[j], y_label="density") viz_helper.line_plot(axes[0][i], dist["x_vals"], dist["y_means"], label=target_names[j], y_label="density") else: raise Exception("Unknown attribute-type: " + str(value_dict[dist.scope[0]])) pad_col = 5 feature_names = [value_dict[x][1] for x in sorted(spn1.scope)] for ax, col in zip(axes[0], feature_names): ax.annotate(col, xy=(0.5, 1), xytext=(0, pad_col), xycoords='axes fraction', textcoords='offset points', size='large', ha='center', va='baseline') #pad_row = 5 #info = "" #for i, prob in enumerate(ps): # info += str(value_dict[target_id][1]) + "=" + str(value_dict[target_id][2][i]) + " " + str(round(prob*100,4)) + "%\n" #axes[0][0].annotate(info, xy=(0, 0.5), xytext=(-axes[0][0].yaxis.labelpad - pad_row, 0), xycoords=axes[0][0].yaxis.label, textcoords='offset points', size='large', ha='right', va='center') axes[0][0].legend(loc='upper center', bbox_to_anchor=(0.5, -0.25)) plt.tight_layout() #fig.subplots_adjust(left=0.15, top=0.9) if save_path is None: plt.show() else: plt.savefig(save_path)
def visualize_target_based_overall_distribution_single(spn, target_id, value_dict=None, rang=None, numeric_prec=50, save_path=None): if value_dict is None: value_dict = fn.generate_adhoc_value_dict(spn) if rang is not None: assert (rang[target_id] is None) if rang is not None: _, spn = fn.marg_rang(spn, rang) n_vals = len(value_dict[target_id][2]) ncols = len(spn.scope) - 1 nrows = n_vals figsize_x = ncols * 3 figsize_y = nrows * 2 fig, axes = plt.subplots(nrows, ncols, figsize=(figsize_x, figsize_y), squeeze=False) ps = [] for v in range(n_vals): tmp_rang = [None] * (np.max(spn.scope) + 1) tmp_rang[target_id] = NominalRange([v]) p, spn1 = fn.marg_rang(spn, tmp_rang) ps.append(p) overall_population = fn.get_overall_population( spn1, value_dict=value_dict, numeric_prec=numeric_prec) for i, f_id in enumerate(sorted(spn1.scope)): dist = overall_population[f_id] if dist["feature_type"] == "discrete": viz_helper.bar_plot(axes[v][i], dist["y_means"], dist["x_labels"], y_err=np.sqrt(dist["y_vars"]), y_label="probability", ylim=[0, 1]) elif dist["feature_type"] == "numeric": viz_helper.line_plot(axes[v][i], dist["x_vals"], dist["y_means"], y_errs=np.sqrt(dist["y_vars"]), y_label="density") else: raise Exception("Unknown attribute-type: " + str(value_dict[dist.scope[0]])) pad_col = 5 feature_names = [value_dict[x][1] for x in sorted(spn1.scope)] for ax, col in zip(axes[0], feature_names): ax.annotate(col, xy=(0.5, 1), xytext=(0, pad_col), xycoords='axes fraction', textcoords='offset points', size='large', ha='center', va='baseline') pad_row = 5 for i, p in enumerate(ps): axes[i][0].annotate(str(round(p * 100, 4)) + "%\n" + value_dict[target_id][1] + "=" + value_dict[target_id][2][i], xy=(0, 0.5), xytext=(-axes[i][0].yaxis.labelpad - pad_row, 0), xycoords=axes[i][0].yaxis.label, textcoords='offset points', size='large', ha='right', va='center') plt.tight_layout() fig.subplots_adjust(left=0.15, top=0.9) if save_path is None: plt.show() else: plt.savefig(save_path)
def test_get_overall_population(): spn = example_spns.get_gender_spn() overall_pop = fn.get_overall_population(spn) print(overall_pop)
feature_scope = {2} data = np.array([np.nan, np.nan, np.nan]) expect = fn.expect_spnflow(spn, feature_scope, data) print(expect) #Sub-population sub_pops = fn.get_sub_populations(spn) print(sub_pops) #Value_dict val_dict = fn.generate_adhoc_value_dict(spn) print(val_dict) #overall_population overall_pop = fn.get_overall_population(spn) #Titanic spn, value_dict, _ = spn_handler.load_spn(dataset_name, rdc_threshold, min_instances_slice) #Classify ranges = np.array([[ NominalRange([1]), NominalRange([1]), None, None, None, None, None, None ], [ NominalRange([1]), NominalRange([0]), None, None, None, None, None, None ],