def evaluate(p_t, p_yt_co_occur): p_yt = norm(p_yt_co_occur) p_tx = membership_probability(p_t, p_yt, beta, p_x, p_yx) fe = get_free_energy(bp_x.value, p_tx, p_yx, p_t, p_yt, beta) free_energy.append(fe) assigns = join_assigns(hardcluster(p_tx), assigns) entropy.append(compute_entropy(assigns))
def split_entropy(init_beta, converge_dist, split_dist, numOfX, \ p_x, p_yx, p_yx_co_occur, trials = 10): entropy = [] traces = ['' for k in range(numOfX)] loop = 0 while loop < trials: loop = loop + 1 log.info("Loop %d" % loop) init_p_tx, assignments = hartigan_twoCentroids(p_x, p_yx, numOfX) beta, p_tx = search_beta(init_p_tx, init_beta, converge_dist, split_dist, p_x, p_yx, p_yx_co_occur) for k, c in zip(range(numOfX), np.argmax(p_tx, axis=1)): traces[k] = traces[k] + "(%d)" % c entropy.append(compute_entropy(traces)) return traces, entropy
y = df_results["Best model ROC"] y = y.sample(frac=1) entropies = [] #Compute Shanon's entropy for i in range(len(y)): #0,1,...,201 pd_series = pd.Series(y[:i + 1]) count = pd_series.value_counts() probabilities = count / (i + 1) entropy = [] for prob in probabilities: entropy.append(prob * log2(prob)) entropies.append(-sum(entropy)) # Plot the entropy vs. the number of training datasets training_datasets = list(range(1, 203)) plt.figure(dpi=1200) plt.xlabel('Number of training datasets', fontsize=16) plt.ylabel("Shannon's entropy [bits]", fontsize=16) plt.yticks([0, 1, 2, 3, 4], [0, 1, 2, 3, 4], fontsize=14) plt.ylim(0, 3.5) plt.xticks(fontsize=14) plt.title("Entropy vs. Number of training datasets", fontsize=16, fontweight='bold')