def modularity(factors, codes, continuous_factors=True, nb_bins=10): ''' Modularity metric from K. Ridgeway and M. C. Mozer, “Learning deep disentangled embeddings with the f-statistic loss,” in NeurIPS, 2018. :param factors: dataset of factors each column is a factor and each line is a data point :param codes: latent codes associated to the dataset of factors each column is a latent code and each line is a data point :param continuous_factors: True: factors are described as continuous variables False: factors are described as discrete variables :param nb_bins: number of bins to use for discretization ''' # count the number of factors and latent codes nb_factors = factors.shape[1] nb_codes = codes.shape[1] # quantize factors if they are continuous if continuous_factors: factors = minmax_scale(factors) # normalize in [0, 1] all columns factors = get_bin_index(factors, nb_bins) # quantize values and get indexes # quantize latent codes codes = minmax_scale(codes) # normalize in [0, 1] all columns codes = get_bin_index(codes, nb_bins) # quantize values and get indexes # compute mutual information matrix mi_matrix = np.zeros((nb_factors, nb_codes)) for f in range(nb_factors): for c in range(nb_codes): mi_matrix[f, c] = get_mutual_information(factors[:, f], codes[:, c], normalize=False) # compute the score for all codes sum_score = 0 for c in range(nb_codes): # find the index of the factor with the maximum MI max_mi_idx = np.argmax(mi_matrix[:, c]) # compute numerator numerator = 0 for f, mi_f in enumerate(mi_matrix[:, c]): if f != max_mi_idx: numerator += mi_f**2 # get the score for this code s = 1 - numerator / (mi_matrix[max_mi_idx, c]**2 * (nb_factors - 1)) sum_score += s # compute the mean gap modularity_score = sum_score / nb_codes return modularity_score
def mig(factors, codes, continuous_factors=True, nb_bins=10): ''' MIG metric from R. T. Q. Chen, X. Li, R. B. Grosse, and D. K. Duvenaud, “Isolating sources of disentanglement in variationalautoencoders,” in NeurIPS, 2018. :param factors: dataset of factors each column is a factor and each line is a data point :param codes: latent codes associated to the dataset of factors each column is a latent code and each line is a data point :param continuous_factors: True: factors are described as continuous variables False: factors are described as discrete variables :param nb_bins: number of bins to use for discretization ''' # count the number of factors and latent codes nb_factors = factors.shape[1] nb_codes = codes.shape[1] # quantize factors if they are continuous if continuous_factors: factors = minmax_scale(factors) # normalize in [0, 1] all columns factors = get_bin_index(factors, nb_bins) # quantize values and get indexes # quantize latent codes codes = minmax_scale(codes) # normalize in [0, 1] all columns codes = get_bin_index(codes, nb_bins) # quantize values and get indexes # compute mutual information matrix mi_matrix = np.zeros((nb_factors, nb_codes)) for f in range(nb_factors): for c in range(nb_codes): mi_matrix[f, c] = get_mutual_information(factors[:, f], codes[:, c]) # compute the mean gap for all factors sum_gap = 0 for f in range(nb_factors): mi_f = np.sort(mi_matrix[f, :]) # get diff between highest and second highest term and add it to total gap sum_gap += mi_f[-1] - mi_f[-2] # compute the mean gap mig_score = sum_gap / nb_factors return mig_score
def mig_sup(factors, codes, continuous_factors=True, nb_bins=10): ''' MIG-SUP metric from Z. Li, J. V. Murkute, P. K. Gyawali, and L. Wang, “Progressive learning and disentanglement of hierarchicalrepresentations,” in ICLR, 2020. :param factors: dataset of factors each column is a factor and each line is a data point :param codes: latent codes associated to the dataset of factors each column is a latent code and each line is a data point :param continuous_factors: True: factors are described as continuous variables False: factors are described as discrete variables :param nb_bins: number of bins to use for discretization ''' # count the number of factors and latent codes nb_factors = factors.shape[1] nb_codes = codes.shape[1] # quantize factors if they are continuous if continuous_factors: factors = minmax_scale(factors) # normalize in [0, 1] all columns factors = get_bin_index(factors, nb_bins) # quantize values and get indexes # quantize latent codes codes = minmax_scale(codes) # normalize in [0, 1] all columns codes = get_bin_index(codes, nb_bins) # quantize values and get indexes # compute mutual information matrix mi_matrix = np.zeros((nb_factors, nb_codes)) for f in range(nb_factors): for c in range(nb_codes): mi_matrix[f, c] = get_mutual_information(factors[:, f], codes[:, c]) # compute the mean gap for all codes sum_gap = 0 for c in range(nb_codes): mi_c = np.sort(mi_matrix[:, c]) # get diff between highest and second highest term and add it to total gap sum_gap += mi_c[-1] - mi_c[-2] # compute the mean gap mig_sup_score = sum_gap / nb_codes return mig_sup_score
def explicitness(factors, codes, continuous_factors=True, nb_bins=10, scale=True, impl=1): ''' Explicitness metrics from K. Ridgeway and M. C. Mozer, “Learning deep disentangled embeddings with the f-statistic loss,” in NeurIPS, 2018. :param factors: dataset of factors each column is a factor and each line is a data point :param codes: latent codes associated to the dataset of factors each column is a latent code and each line is a data point :param continuous_factors: True: factors are described as continuous variables False: factors are described as discrete variables :param nb_bins: number of bins to use for discretization :param scale: if True, the output will be scaled from 0 to 1 instead of 0.5 to 1 :param impl: implementation to use for explicitness score computation ''' # count the number of factors and latent codes nb_factors = factors.shape[1] nb_codes = codes.shape[1] # quantize factors if they are continuous if continuous_factors: factors = minmax_scale(factors) # normalize in [0, 1] all columns factors = get_bin_index(factors, nb_bins) # quantize values and get indexes # normalize in [0, 1] all columns codes = minmax_scale(codes) # compute score using one of the 2 implementations if impl == 1: return _implementation_1(factors, codes, nb_factors, scale) elif impl == 2: return _implementation_2(factors, codes, nb_factors, scale) else: raise ValueError( f'ERROR -- argument "impl" is {impl} but must be either 1 or 2')
def sap(factors, codes, continuous_factors=True, nb_bins=10, regression=True): ''' SAP metric from A. Kumar, P. Sattigeri, and A. Balakrishnan, “Variational inference of disentangled latent concepts from unlabeledobservations,” in ICLR, 2018. :param factors: dataset of factors each column is a factor and each line is a data point :param codes: latent codes associated to the dataset of factors each column is a latent code and each line is a data point :param continuous_factors: True: factors are described as continuous variables False: factors are described as discrete variables :param nb_bins: number of bins to use for discretization :param regression: True: compute score using regression algorithms False: compute score using classification algorithms ''' # count the number of factors and latent codes nb_factors = factors.shape[1] nb_codes = codes.shape[1] # perform regression if regression: assert (continuous_factors ), f'Cannot perform SAP regression with discrete factors.' return _sap_regression(factors, codes, nb_factors, nb_codes) # perform classification else: # quantize factors if they are continuous if continuous_factors: factors = minmax_scale(factors) # normalize in [0, 1] all columns factors = get_bin_index(factors, nb_bins) # quantize values and get indexes # normalize in [0, 1] all columns codes = minmax_scale(codes) # compute score using classification algorithms return _sap_classification(factors, codes, nb_factors, nb_codes)
def dcimig(factors, codes, continuous_factors=True, nb_bins=10): ''' DCIMIG metric from A. Sepliarskaia, J. Kiseleva, and M. de Rijke, “Evaluating disentangled representations,” arXiv:1910.05587, 2020. :param factors: dataset of factors each column is a factor and each line is a data point :param codes: latent codes associated to the dataset of factors each column is a latent code and each line is a data point :param continuous_factors: True: factors are described as continuous variables False: factors are described as discrete variables :param nb_bins: number of bins to use for discretization ''' # count the number of factors and latent codes nb_factors = factors.shape[1] nb_codes = codes.shape[1] # quantize factors if they are continuous if continuous_factors: factors = minmax_scale(factors) # normalize in [0, 1] all columns factors = get_bin_index(factors, nb_bins) # quantize values and get indexes # quantize latent codes codes = minmax_scale(codes) # normalize in [0, 1] all columns codes = get_bin_index(codes, nb_bins) # quantize values and get indexes # compute mutual information matrix mi_matrix = np.zeros((nb_factors, nb_codes)) for f in range(nb_factors): for c in range(nb_codes): mi_matrix[f, c] = get_mutual_information(factors[:, f], codes[:, c], normalize=False) # compute the gap for all codes for c in range(nb_codes): mi_c = np.sort(mi_matrix[:, c]) max_idx = np.argmax(mi_matrix[:, c]) # get diff between highest and second highest term gap gap = mi_c[-1] - mi_c[-2] # replace the best by the gap and the rest by 0 mi_matrix[:, c] = mi_matrix[:, c] * 0 mi_matrix[max_idx, c] = gap # find the best gap for each factor gap_sum = 0 for f in range(nb_factors): gap_sum += np.max(mi_matrix[f, :]) # sum the entropy for each factors factor_entropy = 0 for f in range(nb_factors): factor_entropy += drv.entropy(factors[:, f]) # compute the mean gap dcimig_score = gap_sum / factor_entropy return dcimig_score
def irs(factors, codes, continuous_factors=True, nb_bins=10, diff_quantile=1.): ''' IRS metric from R. Suter, D. Miladinovic, B. Schölkopf, and S. Bauer, “Robustly disentangled causal mechanisms: Validatingdeep representations for interventional robustness,” in ICML, 2019. :param factors: dataset of factors each column is a factor and each line is a data point :param codes: latent codes associated to the dataset of factors each column is a latent code and each line is a data point :param continuous_factors: True: factors are described as continuous variables False: factors are described as discrete variables :param nb_bins: number of bins to use for discretization :param diff_quantile: float value between 0 and 1 to decide what quantile of diffs to select use 1.0 for the version in the paper ''' # quantize factors if they are continuous if continuous_factors: factors = minmax_scale(factors) # normalize in [0, 1] all columns factors = get_bin_index(factors, nb_bins) # quantize values and get indexes # remove constant dimensions codes = _drop_constant_dims(codes) if not codes.any(): irs_score = 0.0 else: # count the number of factors and latent codes nb_factors = factors.shape[1] nb_codes = codes.shape[1] # compute normalizer max_deviations = np.max(np.abs(codes - codes.mean(axis=0)), axis=0) cum_deviations = np.zeros([nb_codes, nb_factors]) for i in range(nb_factors): unique_factors = np.unique(factors[:, i], axis=0) assert(unique_factors.ndim == 1) nb_distinct_factors = unique_factors.shape[0] for k in range(nb_distinct_factors): # compute E[Z | g_i] match = factors[:, i] == unique_factors[k] e_loc = np.mean(codes[match, :], axis=0) # difference of each value within that group of constant g_i to its mean diffs = np.abs(codes[match, :] - e_loc) max_diffs = np.percentile(diffs, q=diff_quantile*100, axis=0) cum_deviations[:, i] += max_diffs cum_deviations[:, i] /= nb_distinct_factors # normalize value of each latent dimension with its maximal deviation normalized_deviations = cum_deviations / max_deviations[:, np.newaxis] irs_matrix = 1.0 - normalized_deviations disentanglement_scores = irs_matrix.max(axis=1) if np.sum(max_deviations) > 0.0: irs_score = np.average(disentanglement_scores, weights=max_deviations) else: irs_score = np.mean(disentanglement_scores) return irs_score
def jemmig(factors, codes, continuous_factors=True, nb_bins=10): ''' JEMMIG metric from K. Do and T. Tran, “Theory and evaluation metrics for learning disentangled representations,” in ICLR, 2020. :param factors: dataset of factors each column is a factor and each line is a data point :param codes: latent codes associated to the dataset of factors each column is a latent code and each line is a data point :param continuous_factors: True: factors are described as continuous variables False: factors are described as discrete variables :param nb_bins: number of bins to use for discretization ''' # count the number of factors and latent codes nb_factors = factors.shape[1] nb_codes = codes.shape[1] # quantize factors if they are continuous if continuous_factors: factors = minmax_scale(factors) # normalize in [0, 1] all columns factors = get_bin_index(factors, nb_bins) # quantize values and get indexes # quantize latent codes codes = minmax_scale(codes) # normalize in [0, 1] all columns codes = get_bin_index(codes, nb_bins) # quantize values and get indexes # compute mutual information matrix mi_matrix = np.zeros((nb_factors, nb_codes)) for f in range(nb_factors): for c in range(nb_codes): mi_matrix[f, c] = get_mutual_information(factors[:, f], codes[:, c], normalize=False) # compute joint entropy matrix je_matrix = np.zeros((nb_factors, nb_codes)) for f in range(nb_factors): for c in range(nb_codes): X = np.stack((factors[:, f], codes[:, c]), 0) je_matrix[f, c] = drv.entropy_joint(X) # compute the mean gap for all factors sum_gap = 0 for f in range(nb_factors): mi_f = np.sort(mi_matrix[f, :]) je_idx = np.argsort(mi_matrix[f, :])[-1] # Compute unormalized JEMMIG jemmig_not_normalized = je_matrix[f, je_idx] - mi_f[-1] + mi_f[-2] # normalize by H(f) + log(#bins) jemmig_f = jemmig_not_normalized / (drv.entropy_joint(factors[:, f]) + np.log2(nb_bins)) jemmig_f = 1 - jemmig_f sum_gap += jemmig_f # compute the mean gap jemmig_score = sum_gap / nb_factors return jemmig_score
def z_max_var(factors, codes, continuous_factors=True, nb_bins=3, batch_size=200, nb_training=800, nb_eval=800, nb_variance_estimate=10000, std_threshold=0.05, scale=True, verbose=False): ''' Z-max Variance metric from M. Kim, Y. Wang, P. Sahu, and V. Pavlovic, “Relevance Factor VAE: Learning and identifying disentangledfactors,” arXiv:1902.01568, 2019. :param factors: dataset of factors each column is a factor and each line is a data point :param codes: latent codes associated to the dataset of factors each column is a latent code and each line is a data point :param continuous_factors: True: factors are described as continuous variables False: factors are described as discrete variables :param nb_bins: number of bins to use for discretization :param batch_size: size of batch :param nb_training: number of training points :param nb_eval: number of evaluation points :param nb_variance_estimate: number of points to use for global variance estimation :param std_threshold: minimum accepted standard deviation :param scale: if True, the output will be scaled from 0 to 1 :param verbose: if True, print warnings ''' # count the number of factors and latent codes nb_factors = factors.shape[1] nb_codes = codes.shape[1] # quantize factors if they are continuous if continuous_factors: factors = minmax_scale(factors) # normalize in [0, 1] all columns factors = get_bin_index(factors, nb_bins) # quantize values and get indexes # compute an estimation of empirical variance for each latent dimension lines_idx = np.arange(codes.shape[0]) np.random.shuffle(lines_idx) var_codes = codes[lines_idx][:nb_variance_estimate] emp_variances = _compute_variances(var_codes, axis=0) # identify active latent dimensions active_dims = _prune_dims(emp_variances, threshold=std_threshold, verbose=verbose) # prepare Z-max-var datasets for training and evaluation train_set, eval_set = _prepare_datasets(factors=factors, codes=codes, batch_size=batch_size, nb_training=nb_training, nb_eval=nb_eval, verbose=verbose, variances=emp_variances, active_dims=active_dims) # discretization is too fine grained -- score cannot be computed correctly if train_set is NaN and eval_set is NaN: return NaN # compute training accuracy training_votes = _compute_votes(inputs=train_set[:, 0], targets=train_set[:, 1], nb_codes=nb_codes, nb_factors=nb_factors) latent_idx = np.arange(nb_codes) # (nb_codes, ) classifier = np.argmax(training_votes, axis=1) # (nb_codes, ) train_accuracy = np.sum( training_votes[latent_idx, classifier]) * 1. / np.sum(training_votes) # compute evaluation accuracy eval_votes = _compute_votes(inputs=eval_set[:, 0], targets=eval_set[:, 1], nb_codes=nb_codes, nb_factors=nb_factors) eval_accuracy = np.sum(eval_votes[latent_idx, classifier]) * 1. / np.sum(eval_votes) # scale scores in [0, 1] if scale: # min value corresponds to a classifier that chooses at random min_val, max_val = 1. / nb_factors, 1. train_accuracy = (train_accuracy - min_val) / (max_val - min_val) eval_accuracy = (eval_accuracy - min_val) / (max_val - min_val) return eval_accuracy
def z_diff(factors, codes, continuous_factors=True, nb_bins=10, batch_size=200, nb_training=10000, nb_eval=5000, nb_max_iterations=10000, scale=True): ''' Z-diff metric from I. Higgins, L. Matthey, A. Pal, C. Burgess, X. Glorot, M. Botvinick, S. Mohamed, and A. Lerchner, “β-VAE:Learning basic visual concepts with a constrained variational framework,” in ICLR, 2017. :param factors: dataset of factors each column is a factor and each line is a data point :param codes: latent codes associated to the dataset of factors each column is a latent code and each line is a data point :param continuous_factors: True: factors are described as continuous variables False: factors are described as discrete variables :param nb_bins: number of bins to use for discretization :param batch_size: size of batch :param nb_training: number of training points :param nb_eval: number of evaluation points :param nb_max_iterations: number of training iterations for the linear model :param scale: if True, the output will be scaled from 0 to 1 ''' # count the number of factors nb_factors = factors.shape[1] # quantize factors if they are continuous if continuous_factors: factors = minmax_scale(factors) # normalize in [0, 1] all columns factors = get_bin_index(factors, nb_bins) # quantize values and get indexes # prepare Z-diff datasets for training and evaluation train_set, eval_set = _prepare_datasets(factors=factors, codes=codes, batch_size=batch_size, nb_training=nb_training, nb_eval=nb_eval) # discretization is too fine grained -- score cannot be computed correctly if train_set is NaN and eval_set is NaN: return NaN # train model inputs, targets = train_set model = linear_model.LogisticRegression(max_iter=nb_max_iterations) model.fit(inputs, targets) # compute training accuracy train_accuracy = model.score(inputs, targets) # compute evaluation accuracy inputs, targets = eval_set eval_accuracy = model.score(inputs, targets) # scale scores in [0, 1] if scale: # min value corresponds to a classifier that chooses at random min_val, max_val = 1. / nb_factors, 1. train_accuracy = (train_accuracy - min_val) / (max_val - min_val) eval_accuracy = (eval_accuracy - min_val) / (max_val - min_val) return eval_accuracy