Exemplo n.º 1
0
def mi(x, y, bins=10):
    """Mutual information between x and y"""
    H_x = u.compute_entropy(np.histogram(x, bins)[0])
    H_y = u.compute_entropy(np.histogram(y, bins)[0])
    c_xy = np.histogram2d(x, y, bins)[0]
    mi = skm.mutual_info_score(None, None, contingency=c_xy)
    return mi / np.sqrt(H_x * H_y)
Exemplo n.º 2
0
def mi(x, y, bins=10):
    """Mutual information between x and y"""
    H_x = u.compute_entropy(np.histogram(x, bins)[0])
    H_y = u.compute_entropy(np.histogram(y, bins)[0])
    c_xy = np.histogram2d(x, y, bins)[0]
    mi = skm.mutual_info_score(None, None, contingency=c_xy)
    return mi / np.sqrt(H_x * H_y)
Exemplo n.º 3
0
def get_branch_lengths_estimates(tree):
	"""
	:param tree: Tree node or tree file or newick tree string;
	:return:
	"""
	# TBL
	branches = get_branch_lengths(tree)
	entropy = compute_entropy(branches)

	return max(branches), min(branches), np.mean(branches), np.std(branches), entropy
Exemplo n.º 4
0
def get_diameters_estimates(tree_filepath, actual_bl=True):
	"""
	if not actual_bl - function changes the tree! send only filepath
	:param tree_filepath: tree file or newick tree string;
	:param actual_bl: True to sum actual dists, False for num of branches
	:return: min, max, mean, and std of tree diameters
	"""
	# tree = copy.deepcopy(get_newick_tree(tree)) # do not deepcopy! when trees are large it exceeds recursion depth
	if not actual_bl:
		assert isinstance(tree_filepath, str)
	tree = get_newick_tree(tree_filepath)
	tree_root = tree.get_tree_root()
	if not actual_bl:
		for node in tree_root.iter_descendants():
			node.dist = 1.0
	tree_diams = []
	leaves = list(tree_root.iter_leaves())
	for leaf1, leaf2 in itertools.combinations(leaves, 2):
		tree_diams.append(leaf1.get_distance(leaf2))
	entropy = compute_entropy(tree_diams)

	return max(tree_diams), min(tree_diams), np.mean(tree_diams), np.std(tree_diams), entropy
Exemplo n.º 5
0
def time_entropy(visits):
    """Compute entropy of venue with respect to time of the day of its
    checkins."""
    hours = np.bincount([t.hour for t in visits], minlength=24)
    return u.compute_entropy(hours.astype(float))/np.log(24.0)
Exemplo n.º 6
0
def venue_entropy(visitors):
    """Compute the entropy of venue given the list of its `visitors`."""
    # pylint: disable=E1101
    return u.compute_entropy(np.array(Counter(visitors).values(), dtype=float))
Exemplo n.º 7
0
def jensen_shannon_divergence(P, Q):
    """Compute JSD(P || Q) as defined in
    https://en.wikipedia.org/wiki/Jensen–Shannon_divergence """
    avg = 0.5*(P + Q)
    avg_entropy = 0.5*(u.compute_entropy(P) + u.compute_entropy(Q))
    return u.compute_entropy(avg) - avg_entropy
Exemplo n.º 8
0
def jensen_shannon_divergence(P, Q):
    """Compute JSD(P || Q) as defined in
    https://en.wikipedia.org/wiki/Jensen–Shannon_divergence """
    avg = 0.5 * (P + Q)
    avg_entropy = 0.5 * (u.compute_entropy(P) + u.compute_entropy(Q))
    return u.compute_entropy(avg) - avg_entropy
Exemplo n.º 9
0
def make_bias_noise_figure(bias_cp_dfs,
                           entropy_cp_dfs,
                           noisy_cp_dfs,
                           noise_levels,
                           noiseless_cp_df,
                           analytical_cp_df,
                           bias_cp_df_labels=None,
                           entropy_cp_df_labels=None,
                           base_figure_scale=3,
                           include_binomial_null=False):

    num_noisy_cp_dfs = len(noisy_cp_dfs)

    normal_axis_scale = 5
    small_axis_scale = 2
    gap_scale = 1

    figsize = (num_noisy_cp_dfs * base_figure_scale, 2 * base_figure_scale)

    gridsize = [
        normal_axis_scale * 2 + small_axis_scale + gap_scale,
        num_noisy_cp_dfs * normal_axis_scale +
        (num_noisy_cp_dfs - 1) * gap_scale
    ]

    f, ax_grid = plt.subplots(*gridsize, figsize=figsize)

    bias_ax = plt.subplot2grid(gridsize, (0, 0),
                               colspan=gridsize[-1] - normal_axis_scale -
                               gap_scale,
                               rowspan=normal_axis_scale)

    counts = [
        bias_cp_df.groupby("index_set").count().final_loss
        for bias_cp_df in bias_cp_dfs
    ]
    marginal_counts = [
        utils.compute_marginal_counts(count) for count in counts
    ]

    panels.make_bias_panel(bias_cp_dfs,
                           counts,
                           marginal_counts,
                           labels=bias_cp_df_labels,
                           ax=bias_ax,
                           include_binomial_null=include_binomial_null)

    entropy_ax = plt.subplot2grid(gridsize,
                                  (0, gridsize[-1] - normal_axis_scale),
                                  rowspan=normal_axis_scale,
                                  colspan=normal_axis_scale)

    entropies = [utils.compute_entropy(cp_df) for cp_df in entropy_cp_dfs]
    entropy_sds = [
        utils.bootstrap_entropy_sd(cp_df) for cp_df in entropy_cp_dfs
    ]

    panels.make_entropy_panel(entropy_cp_dfs,
                              entropies,
                              entropy_sds,
                              entropy_cp_df_labels,
                              ax=entropy_ax)

    noise_axs = []

    for ii, (noisy_cp_df,
             noise_level) in enumerate(zip(noisy_cp_dfs, noise_levels)):
        if not noise_axs == []:
            subplot2grid_kwargs = {
                "sharex": noise_axs[0],
                "sharey": noise_axs[0]
            }
        else:
            subplot2grid_kwargs = {}

        noise_ax = plt.subplot2grid(gridsize,
                                    (normal_axis_scale + gap_scale,
                                     (normal_axis_scale + gap_scale) * ii),
                                    rowspan=normal_axis_scale,
                                    colspan=normal_axis_scale,
                                    **subplot2grid_kwargs)

        include_x_label = False

        if ii == 0:
            include_y_label = True
        else:
            include_y_label = False

        if ii + 1 == num_noisy_cp_dfs:
            include_legend = True
        else:
            include_legend = False

        panels.make_noise_comparison_panel(noisy_cp_df,
                                           analytical_cp_df,
                                           noiseless_cp_df,
                                           ax=noise_ax,
                                           include_legend=include_legend,
                                           include_x_label=include_x_label,
                                           include_y_label=include_y_label,
                                           noise_level=noise_level)

        if subplot2grid_kwargs != {}:
            subplot2grid_kwargs = {"sharex": noise_axs[0]}
        histogram_ax = plt.subplot2grid(gridsize,
                                        (normal_axis_scale * 2 + gap_scale,
                                         (normal_axis_scale + gap_scale) * ii),
                                        rowspan=small_axis_scale,
                                        colspan=normal_axis_scale,
                                        **subplot2grid_kwargs)

        cps = [noisy_cp_df, analytical_cp_df]
        colors = [NUMERICAL_CPS_COLOR, ANALYTICAL_CPS_COLOR]

        panels.make_histogram_comparison_panel(cps,
                                               colors,
                                               ax=histogram_ax,
                                               include_x_label=True,
                                               include_y_label=include_y_label)

        noise_axs.append(noise_ax)

    return f, ax_grid, bias_ax, noise_axs