def compute_mig(ground_truth_data, Model, random_state, num_train, batch_size=16): score_dict = {} mus_train, ys_train = utils.generate_batch_factor_code( ground_truth_data, Model, num_train, random_state, batch_size) # assert mus_train.shape[1] == num_train mig_score = [] for binsize in range(2, 42, 4): discretized_mus = _histogram_discretize(mus_train, num_bins=binsize) m = utils.discrete_mutual_info(discretized_mus, ys_train) assert m.shape[0] == mus_train.shape[0] assert m.shape[1] == ys_train.shape[0] # m is [num_latents, num_factors] entropy = utils.discrete_entropy(ys_train) sorted_m = np.sort(m, axis=0)[::-1] a = sorted_m[0, :] - sorted_m[1, :] a = np.delete(a, 0, 0) entropy = np.delete(entropy, 0, 0) mig = np.mean(np.divide(a, entropy)) mig_score.append(mig) mig = max(mig_score) return mig
def aggregation_mig(m, ys_train): """Aggregation function of the MIG.""" score = {} entropy = utils.discrete_entropy(ys_train) sorted_m = np.sort(m, axis=0)[::-1] mig_per_factor = np.divide(sorted_m[0, :] - sorted_m[1, :], entropy[:]) score["mig"] = np.mean(mig_per_factor) assert len(mig_per_factor) == m.shape[1], "Wrong length." for i in range(len(mig_per_factor)): score["mig.factor_{}".format(i)] = mig_per_factor[i] return score
def _compute_mig(mus_train, ys_train): """Computes score based on both training and testing codes and factors.""" score_dict = {} discretized_mus, bins = utils.make_discretizer(mus_train) m = utils.discrete_mutual_info(discretized_mus, ys_train) assert m.shape[0] == mus_train.shape[0] assert m.shape[1] == ys_train.shape[0] # m is [num_latents, num_factors] entropy = utils.discrete_entropy(ys_train) sorted_m = np.sort(m, axis=0)[::-1] score_dict["discrete_mig"] = np.mean( np.divide(sorted_m[0, :] - sorted_m[1, :], entropy[:])) return score_dict
def compute_mi_matrix(mus_train, ys_train, need_discretized_1=False, need_discretized_2=False): score_dict = {} if need_discretized_1: mus_train = utils.make_discretizer(mus_train) if need_discretized_2: ys_train = utils.make_discretizer(ys_train) m = utils.discrete_mutual_info(mus_train, ys_train) assert m.shape[0] == mus_train.shape[0] assert m.shape[1] == ys_train.shape[0] # m is [num_latents, num_factors] entropy = utils.discrete_entropy(ys_train) return m, entropy
def _compute_mig(mus_train, ys_train): """Computes score based on both training and testing codes and factors.""" score_dict = {} discretized_mus = utils.make_discretizer(mus_train) m = utils.discrete_mutual_info(discretized_mus, ys_train) assert m.shape[0] == mus_train.shape[0] assert m.shape[1] == ys_train.shape[0] # m is [num_latents, num_factors] entropy = utils.discrete_entropy(ys_train) sorted_m = np.sort(m, axis=0)[::-1] # for local sampling you won't sample along some dimensions, # so you will get some 0 entropies for the ys entropy (storing one # value per factor of ys), so we safe divide, replace with NaN # and remove NaNs using NaNmean score_dict["discrete_mig"] = np.nanmean( np.divide(sorted_m[0, :] - sorted_m[1, :], entropy[:], out=np.zeros_like(entropy[:]).fill(np.nan), where=entropy[:] != 0)) return score_dict
def test_discrete_entropy(self): target = np.array([[1, 1, 2, 2, 3, 3], [3, 3, 2, 2, 1, 1]]) result = utils.discrete_entropy(target) shouldbe = np.log(3) np.testing.assert_allclose(result, [shouldbe, shouldbe])