def on_plot(self, cfg, output_dir): average = lambda train_test: (train_test[0] + train_test[1]) / 2. path = os.path.join(output_dir, 'matrix.png') decode = lambda mat: search.diagonal_beam_search(mat.T) stats = lambda mat: " mean:%.2f mean(max):%.2f" % (np.mean( mat), np.mean(np.max(mat, axis=0))) crt = Criticizer(vae=self.model) crt.sample_batch(inputs=self.test, n_samples=[10000, 5000], factor_names=self.ds.labels, verbose=True) n_codes = crt.n_codes n_factors = crt.n_factors mi = average(crt.create_mutualinfo_matrix()) spearman = average(crt.create_correlation_matrix(method='spearman')) pearson = average(crt.create_correlation_matrix(method='pearson')) height = 16 fig = plt.figure(figsize=(height * n_factors / n_codes * 3 + 2, height + 2)) kw = dict(cbar=True, annotation=True, fontsize=8) ids = decode(mi) vs.plot_heatmap(mi[ids], xticklabels=crt.factor_names, yticklabels=crt.code_names[ids], cmap="Blues", ax=(1, 3, 1), title="[MutualInformation]" + stats(mi), **kw) ids = decode(spearman) vs.plot_heatmap(spearman[ids], xticklabels=crt.factor_names, yticklabels=crt.code_names[ids], cmap="bwr", ax=(1, 3, 2), title="[Spearman]" + stats(spearman), **kw) ids = decode(pearson) vs.plot_heatmap(pearson[ids], xticklabels=crt.factor_names, yticklabels=crt.code_names[ids], cmap="bwr", ax=(1, 3, 3), title="[Pearson]" + stats(pearson), **kw) fig.tight_layout() fig.savefig(path, dpi=120)
from odin.search import (diagonal_beam_search, diagonal_bruteforce_search, diagonal_greedy_search, diagonal_hillclimb_search) from odin.utils import UnitTimer os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' tf.random.set_seed(8) np.random.seed(8) shape = (8, 8) mat = np.random.randint(0, 88, size=shape) print(mat) with UnitTimer(): ids = diagonal_beam_search(mat) print(ids) print(mat[:, ids]) print(np.sum(np.diag(mat[:, ids]))) with UnitTimer(): ids = diagonal_hillclimb_search(mat) print(ids) print(mat[:, ids]) print(np.sum(np.diag(mat[:, ids]))) with UnitTimer(): ids = diagonal_greedy_search(mat) print(ids) print(mat[:, ids]) print(np.sum(np.diag(mat[:, ids])))
def create_divergence_matrix(self, n_samples=1000, lognorm=True, n_components=2, normalize_per_code=True, decode=False): r""" Using GMM fitted on the factors to estimate the divergence to each latent code. It means calculating the divergence: `DKL(q(z|x)||p(y))`, where: - q(z|x) is latent code of Gaussian distribution - p(y) is factor of Gaussian mixture model with `n_components` The calculation is repeated for each pair of (code, factor). This method is recommended for factors that are continuous values. Return: a matrix of shape `[n_codes, n_factors]` """ n_samples = int(n_samples) n_codes = self.n_codes n_factors = self.n_factors matrices = [] for qZ, y in zip(self.representations, self.original_factors): ### normalizing the factors if lognorm: y = np.log1p(y) # standardizing for each factor y = (y - np.mean(y, axis=0, keepdims=True)) / ( np.std(y, axis=0, keepdims=True) + 1e-10) ### train the Gaussian mixture on the factors f_gmm = [] for fidx, (f, fname) in enumerate(zip(y.T, self.factors_name)): gmm = tfd.GaussianMixture.init(f[:, np.newaxis], n_components=n_components, covariance_type='diag', batch_shape=None, dtype=tf.float64, name=fname) f_gmm.append(gmm) ### the code Gaussian dist_type = type(qZ) if isinstance(qZ, tfd.Independent): dist_type = type(qZ.distribution) support_type = (tfd.MultivariateNormalDiag, tfd.Normal) if dist_type not in support_type: raise RuntimeError( "No support posterior distribution: %s, the support distributions are: %s" % (str(dist_type), str(support_type))) z_gau = [] for mean, stddev, code_name in zip(tf.transpose(qZ.mean()), tf.transpose(qZ.stddev()), self.codes_name): mean = tf.cast(mean, tf.float64) stddev = tf.cast(stddev, tf.float64) z_gau.append( tfd.Independent(tfd.Normal(loc=mean, scale=stddev, name=code_name), reinterpreted_batch_ndims=1)) ### calculate the KL divergence density_matrix = np.empty(shape=(n_codes, n_factors), dtype=np.float64) for zidx, gau in enumerate(z_gau): for fidx, gmm in enumerate(f_gmm): # non-analytic KL(q=gau||p=gmm) samples = gau.sample(n_samples) qllk = gau.log_prob(samples) pllk = tf.reduce_sum(tf.reshape( gmm.log_prob(tf.reshape(samples, (-1, 1))), (n_samples, -1)), axis=1) kl = tf.reduce_mean(qllk - pllk) density_matrix[zidx, fidx] = kl.numpy() if bool(normalize_per_code): density_matrix = density_matrix / np.sum( density_matrix, axis=1, keepdims=True) matrices.append(density_matrix) ### decoding and return train, test = matrices if decode: ids = search.diagonal_beam_search(train.T) train = train[ids] test = test[ids] return train, test, ids return train, test
def create_correlation_matrix(self, mean=True, method='spearman', decode=False): r""" Correlation matrix of `latent codes` (row) and `groundtruth factors` (column). Arguments: mean : a Boolean. Using mean as the statistics, otherwise, sampling. method : {'spearman', 'pearson', 'lasso', 'avg'} spearman - rank or monotonic correlation pearson - linear correlation lasso - lasso regression avg - compute all known method then taking average decode : a Boolean. If True, reorganize the row of correlation matrix for the best match between code-factor (i.e. the largest diagonal sum). Note: the decoding is performed on train matrix, then applied to test matrix Returns: train, test : correlation matrices `[n_codes, n_factors]` for both training and testing data. All entries are in `[0, 1]`. (optional) OrderedDict mapping from decoded factor index to latent code index. """ method = str(method).strip().lower() if method in ('avg', 'avr', 'average'): method = 'average' all_corr = ['spearman', 'lasso', 'pearson', 'average'] assert isinstance(mean, bool), "mean is boolean but given: %s" % mean assert method in all_corr, \ "Support %s correlation but given method='%s'" % (str(all_corr), method) # special average mode if method == 'average': mat = [ self.create_correlation_matrix(mean=mean, method=corr, decode=False) for corr in all_corr[:-1] ] n = len(all_corr) - 1 train = sum(i[0] for i in mat) / n test = sum(i[1] for i in mat) / n else: # start form correlation matrix z_train, z_test = self._latent_codes(mean) f_train, f_test = self.factors # helper function def fn_corr(x1, x2): if method == 'lasso': model = Lasso(random_state=self.randint, alpha=0.1) model.fit(x1, x2) # coef_ is [n_target, n_features], so we need transpose here corr_mat = np.transpose(np.absolute(model.coef_)) else: corr_mat = np.empty(shape=(self.n_representations, self.n_factors), dtype=np.float64) for code in range(self.n_representations): for fact in range(self.n_factors): x, y = x1[:, code], x2[:, fact] if method == 'spearman': corr = sp.stats.spearmanr(x, y, nan_policy="omit")[0] elif method == 'pearson': corr = sp.stats.pearsonr(x, y)[0] elif method == 'lasso': pass corr_mat[code, fact] = corr return corr_mat train, test = fn_corr(z_train, f_train), fn_corr(z_test, f_test) ## decoding and return if decode: ids = search.diagonal_beam_search(train.T) train = train[ids, :] test = test[ids, :] return train, test, OrderedDict(zip(range(self.n_factors), ids)) return train, test