Пример #1
0
def preprocess_discretizations(mus_train, cum_distribution):
    discretizations = []
    for j, j_cum_distribution in enumerate(cum_distribution):
        discretized_mus, bins = utils.make_discretizer(mus_train,
                                                       j_cum_distribution)
        discretizations.append([discretized_mus, bins])
    return discretizations
Пример #2
0
def _compute_mig_sup(dataholder, mus_train, ys_train):
    """Computes score based on both training and testing codes and factors."""
    score_dict = {}
    m = np.zeros((mus_train.shape[0], ys_train.shape[0]))
    for j, y_train in enumerate(ys_train):
        discretized_mus, bins = utils.make_discretizer(
            mus_train, dataholder.cumulative_dist[j])
        m[:, j] = utils.discrete_mutual_info(discretized_mus,
                                             ys_train[j].reshape(
                                                 (1, -1))).flatten()
        pass

    # m is [num_latents, num_factors]
    assert m.shape[0] == discretized_mus.shape[0]
    assert m.shape[1] == ys_train.shape[0]

    # Find top two factor MI for each code, get individual disentanglement scores
    entropy = utils.discrete_entropy(ys_train)
    dis = np.zeros((m.shape[0], ))
    norm_dis = np.zeros((m.shape[0], ))
    for i, code_MI in enumerate(m):
        idx = (-code_MI).argsort()[:2]
        j_i, j_k = idx[0], idx[1]
        dis[i] = code_MI[j_i] - code_MI[j_k]
        norm_dis[i] = code_MI[j_i] / (entropy[j_i]) - code_MI[j_k] / (
            entropy[j_k])
        pass

    score_dict["MIG_sup_score"] = np.mean(norm_dis)
    score_dict["MIG_sup_unnormalized"] = np.mean(dis)
    return score_dict
Пример #3
0
def get_MI_matrix(dataholder, mus, ys):
    """Computes score based on both training and testing codes and factors."""
    score_dict = {}
    m = np.zeros((mus.shape[0], ys.shape[0]))
    for j, y_train in enumerate(ys):
        discretized_mus, bins = utils.make_discretizer(
            mus, dataholder.cumulative_dist[j])
        m[:, j] = utils.discrete_mutual_info(discretized_mus, ys[j].reshape(
            (1, -1))).flatten()
        pass
    return m
def unsupervised_metrics(ground_truth_data,
                         representation_function,
                         random_state,
                         artifact_dir=None,
                         num_train=gin.REQUIRED,
                         batch_size=16):
    """Computes unsupervised scores based on covariance and mutual information.

  Args:
    ground_truth_data: GroundTruthData to be sampled from.
    representation_function: Function that takes observations as input and
      outputs a dim_representation sized representation for each observation.
    random_state: Numpy random state used for randomness.
    artifact_dir: Optional path to directory where artifacts can be saved.
    num_train: Number of points used for training.
    batch_size: Batch size for sampling.

  Returns:
    Dictionary with scores.
  """
    del artifact_dir
    scores = {}
    logging.info("Generating training set.")
    mus_train, _ = utils.generate_batch_factor_code(ground_truth_data,
                                                    representation_function,
                                                    num_train, random_state,
                                                    batch_size)
    num_codes = mus_train.shape[0]
    cov_mus = np.cov(mus_train)
    assert num_codes == cov_mus.shape[0]

    # Gaussian total correlation.
    scores["gaussian_total_correlation"] = gaussian_total_correlation(cov_mus)

    # Gaussian Wasserstein correlation.
    scores[
        "gaussian_wasserstein_correlation"] = gaussian_wasserstein_correlation(
            cov_mus)
    scores["gaussian_wasserstein_correlation_norm"] = (
        scores["gaussian_wasserstein_correlation"] / np.sum(np.diag(cov_mus)))

    # Compute average mutual information between different factors.
    mus_discrete, bins = utils.make_discretizer(mus_train)
    mutual_info_matrix = utils.discrete_mutual_info(mus_discrete, mus_discrete)
    np.fill_diagonal(mutual_info_matrix, 0)
    mutual_info_score = np.sum(mutual_info_matrix) / (num_codes**2 - num_codes)
    scores["mutual_info_score"] = mutual_info_score
    return scores
Пример #5
0
def _compute_dcimig(dataholder, mus_train, ys_train):
    """Computes score."""
    score_dict = {}
    m = np.zeros((mus_train.shape[0], ys_train.shape[0]))
    for j, y_train in enumerate(ys_train):
        discretized_mus, bins = utils.make_discretizer(
            mus_train, dataholder.cumulative_dist[j])
        m[:, j] = utils.discrete_mutual_info(discretized_mus,
                                             ys_train[j].reshape(
                                                 (1, -1))).flatten()
        pass

    assert m.shape[0] == discretized_mus.shape[0]
    assert m.shape[1] == ys_train.shape[0]

    # For score normalization
    entropy = utils.discrete_entropy(ys_train)

    # Find top two factor MI for each code, get disentanglement scores and save ids
    Dis = np.zeros((m.shape[0], ))
    jis = []
    for i, code_MI in enumerate(m):
        idx = (-code_MI).argsort()[:2]
        j_i, j_k = idx[0], idx[1]
        jis.append(j_i)
        Dis[i] = code_MI[j_i] - code_MI[j_k]
        pass

    Djz = []
    # For each factor, find the code which disentangles it the most.
    for j, factor_MI in enumerate(m.T):
        II_j = []
        for i in range(m.shape[0]):
            if jis[i] == j:
                II_j.append(i)

        if len(II_j) > 0:
            max_i = np.argmax(Dis[II_j])
            k_j = II_j[max_i]
            Djz.append(Dis[k_j])
        else:
            Djz.append(0)

    score_dict["DCIMIG_unnormalized"] = np.mean(Djz)
    score_dict["DCIMIG_normalized"] = np.mean(np.divide(Djz, entropy))

    return score_dict
Пример #6
0
def compute_irs(dataholder,
                random_state,
                artifact_dir=None,
                diff_quantile=0.99,
                num_train=gin.REQUIRED,
                batch_size=gin.REQUIRED):
    """Computes the Interventional Robustness Score.

    Args:
      ground_truth_data: GroundTruthData to be sampled from.
      representation_function: Function that takes observations as input and
        outputs a dim_representation sized representation for each observation.
      random_state: Numpy random state used for randomness.
      artifact_dir: Optional path to directory where artifacts can be saved.
      diff_quantile: Float value between 0 and 1 to decide what quantile of diffs
        to select (use 1.0 for the version in the paper).
      num_train: Number of points used for training.
      batch_size: Batch size for sampling.

    Returns:
      Dict with IRS and number of active dimensions.
    """
    del artifact_dir
    logging.info("Generating training set.")
    mus, ys = utils.generate_batch_factor_code(dataholder, num_train,
                                               random_state, batch_size)
    assert mus.shape[1] == num_train

    ys_discrete, bins = utils.make_discretizer(ys, dataholder.cumulative_dist)
    active_mus = _drop_constant_dims(mus)

    if not active_mus.any():
        irs_score = 0.0
    else:
        irs_score = scalable_disentanglement_score(ys_discrete.T, active_mus.T, diff_quantile)


    score_dict = {}
    score_dict["IRS"] = irs_score["avg_score"]
    score_dict["IRS_disentanglement_scores"] = irs_score["disentanglement_scores"]
    score_dict["num_active_dims"] = np.sum(active_mus)
    return score_dict
Пример #7
0
def _compute_mig(dataholder, mus_train, ys_train):
    """Computes score based on both training and testing codes and factors."""
    score_dict = {}
    m = np.zeros((mus_train.shape[0], ys_train.shape[0]))
    for j, y_train in enumerate(ys_train):
        discretized_mus, bins = utils.make_discretizer(
            mus_train, dataholder.cumulative_dist[j])
        m[:, j] = utils.discrete_mutual_info(discretized_mus,
                                             ys_train[j].reshape(
                                                 (1, -1))).flatten()
        pass

    # m is [num_latents, num_factors]
    entropy = utils.discrete_entropy(ys_train)
    sorted_m = np.sort(m, axis=0)[::-1]

    score_dict["MIG_score"] = np.mean(
        np.divide(sorted_m[0, :] - sorted_m[1, :], entropy[:]))
    score_dict["MIG_unnormalized"] = np.mean(sorted_m[0, :] - sorted_m[1, :])
    return score_dict