def train_from_torch(self, batch): obs = batch[self._obs_key] action = batch[self._action_key] next_obs = batch[self._next_obs_key] goal = batch[self._goal_key] achieved = self._state_to_goal_fn(next_obs) not_reached = (torch.norm( (achieved - goal), dim=1) > self.equality_threshold) not_reached_logit = self.model(obs, action, return_logits=True) not_reached_logit = not_reached_logit[:, 0] loss = self._criterion(not_reached_logit, not_reached.to(torch.float)) self.optimizer.zero_grad() loss.backward() self.optimizer.step() if self._need_to_update_eval_statistics: self._need_to_update_eval_statistics = False self._eval_statistics['loss'] = np.mean(ptu.get_numpy(loss)) not_reached_predicted = torch.sigmoid(not_reached_logit) self._eval_statistics.update( create_stats_ordered_dict( 'discount_predicted', ptu.get_numpy(not_reached_predicted), )) self._eval_statistics.update( create_stats_ordered_dict('not_reached/mean', np.mean(ptu.get_numpy(not_reached))))
def _statistics_from_batch(self, batch, stat_prefix): statistics = OrderedDict() train_dict = self.get_train_dict(batch) for name in [ 'Policy Loss', ]: tensor = train_dict[name] statistics_name = "{} {} Mean".format(stat_prefix, name) statistics[statistics_name] = np.mean(ptu.get_numpy(tensor)) for name in [ 'QF Outputs', 'Policy Actions', ]: tensor = train_dict[name] statistics.update(create_stats_ordered_dict( '{} {}'.format(stat_prefix, name), ptu.get_numpy(tensor) )) statistics.update(create_stats_ordered_dict( "{} Env Actions".format(stat_prefix), ptu.get_numpy(batch['actions']) )) return statistics
def log_diagnostics(self, paths): final_values = [] final_unclipped_rewards = [] final_rewards = [] for path in paths: final_value = path["actions"][-1][0] final_values.append(final_value) score = path["observations"][0][0] * final_value final_unclipped_rewards.append(score) final_rewards.append(clip_magnitude(score, 1)) last_statistics = OrderedDict() last_statistics.update( create_stats_ordered_dict( 'Final Value', final_values, )) last_statistics.update( create_stats_ordered_dict( 'Unclipped Final Rewards', final_unclipped_rewards, )) last_statistics.update( create_stats_ordered_dict( 'Final Rewards', final_rewards, )) for key, value in last_statistics.items(): logger.record_tabular(key, value) return final_unclipped_rewards
def log_diagnostics(self, paths, logger=default_logger): statistics = OrderedDict() for name_in_env_infos, name_to_log in [ ('distance_to_target', 'Distance to Target'), ('speed', 'Speed'), ('distance_reward', 'Distance Reward'), ('action_reward', 'Action Reward'), ]: stat = get_stat_in_paths(paths, 'env_infos', name_in_env_infos) statistics.update(create_stats_ordered_dict( name_to_log, stat, )) distances = get_stat_in_paths(paths, 'env_infos', 'distance_to_target') statistics.update( create_stats_ordered_dict( "Final Distance to Target", [ds[-1] for ds in distances], )) statistics.update( create_stats_ordered_dict( "Path Lengths", get_path_lengths(paths), )) for key, value in statistics.items(): logger.record_tabular(key, value)
def _statistics_from_paths(self, paths, stat_prefix): eval_replay_buffer = UpdatableSubtrajReplayBuffer( len(paths) * (self.max_path_length + 1), self.env, self.subtraj_length, self.memory_dim, ) for path in paths: eval_replay_buffer.add_trajectory(path) raw_subtraj_batch = eval_replay_buffer.get_all_valid_subtrajectories() assert raw_subtraj_batch is not None subtraj_batch = create_torch_subtraj_batch(raw_subtraj_batch) if self.save_memory_gradients: subtraj_batch['memories'].requires_grad = True statistics = self._statistics_from_subtraj_batch( subtraj_batch, stat_prefix=stat_prefix) statistics.update( eval_util.get_generic_path_information( paths, stat_prefix="Test", )) env_actions = np.vstack( [path["actions"][:self.action_dim] for path in paths]) writes = np.vstack( [path["actions"][self.action_dim:] for path in paths]) statistics.update( create_stats_ordered_dict('Env Actions', env_actions, stat_prefix=stat_prefix)) statistics.update( create_stats_ordered_dict('Writes', writes, stat_prefix=stat_prefix)) return statistics
def debug_statistics(self): """ Given an image $$x$$, samples a bunch of latents from the prior $$z_i$$ and decode them $$\hat x_i$$. Compare this to $$\hat x$$, the reconstruction of $$x$$. Ideally - All the $$\hat x_i$$s do worse than $$\hat x$$ (makes sure VAE isn’t ignoring the latent) - Some $$\hat x_i$$ do better than other $$\hat x_i$$ (tests for coverage) """ debug_batch_size = 64 data = self.get_batch(train=False) reconstructions, _, _ = self.model(data) img = data[0] recon_mse = ((reconstructions[0] - img)**2).mean().view(-1) img_repeated = img.expand((debug_batch_size, img.shape[0])) samples = ptu.randn(debug_batch_size, self.representation_size) random_imgs, _ = self.model.decode(samples) random_mses = (random_imgs - img_repeated)**2 mse_improvement = ptu.get_numpy(random_mses.mean(dim=1) - recon_mse) stats = create_stats_ordered_dict( 'debug/MSE improvement over random', mse_improvement, ) stats.update( create_stats_ordered_dict( 'debug/MSE of random decoding', ptu.get_numpy(random_mses), )) stats['debug/MSE of reconstruction'] = ptu.get_numpy(recon_mse)[0] return stats
def log_diagnostics(self, paths, **kwargs): list_of_rewards, terminals, obs, actions, next_obs = split_paths(paths) returns = [] for rewards in list_of_rewards: returns.append(np.sum(rewards)) statistics = OrderedDict() statistics.update( create_stats_ordered_dict( 'Undiscounted Returns', returns, )) statistics.update( create_stats_ordered_dict( 'Rewards', list_of_rewards, )) statistics.update(create_stats_ordered_dict( 'Actions', actions, )) fraction_of_time_on_platform = [o[1] for o in obs] statistics['Fraction of time on platform'] = np.mean( fraction_of_time_on_platform) for key, value in statistics.items(): logger.record_tabular(key, value) return returns
def log_diagnostics(self, paths, **kwargs): list_of_rewards, terminals, obs, actions, next_obs = split_paths(paths) returns = [] for rewards in list_of_rewards: returns.append(np.sum(rewards)) last_statistics = OrderedDict() last_statistics.update( create_stats_ordered_dict( 'UndiscountedReturns', returns, )) last_statistics.update( create_stats_ordered_dict( 'Rewards', list_of_rewards, )) last_statistics.update(create_stats_ordered_dict( 'Actions', actions, )) for key, value in last_statistics.items(): logger.record_tabular(key, value) return returns
def __call__(self, paths: List[Path], contexts: List[Context]) -> Diagnostics: goals = [c[self._desired_goal_key] for c in contexts] achieved_goals = [ np.array([o[self._achieved_goal_key] for o in path['observations']]) for path in paths ] statistics = OrderedDict() stat_to_lists = defaultdict(list) for achieved, goal in zip(achieved_goals, goals): difference = achieved - goal x_difference = difference[..., :1] y_difference = difference[..., 1:2] z_difference = difference[..., 2:3] joint_difference = difference[..., 3:6] stat_to_lists['x/distance'].append( np.linalg.norm(x_difference, axis=-1) ) stat_to_lists['y/distance'].append( np.linalg.norm(y_difference, axis=-1) ) stat_to_lists['z/distance'].append( np.linalg.norm(z_difference, axis=-1) ) stat_to_lists['joint/distance'].append( np.linalg.norm(joint_difference, axis=-1) ) stat_to_lists['x/success'].append( np.linalg.norm(x_difference, axis=-1) <= self.success_threshold ) stat_to_lists['y/success'].append( np.linalg.norm(y_difference, axis=-1) <= self.success_threshold ) stat_to_lists['z/success'].append( np.linalg.norm(z_difference, axis=-1) <= self.success_threshold ) stat_to_lists['joint/success'].append( np.linalg.norm(joint_difference, axis=-1) <= self.success_threshold ) for stat_name, stat_list in stat_to_lists.items(): statistics.update(create_stats_ordered_dict( stat_name, stat_list, always_show_all_stats=True, )) statistics.update(create_stats_ordered_dict( '{}/final'.format(stat_name), [s[-1:] for s in stat_list], always_show_all_stats=True, exclude_max_min=True, )) return statistics
def compute_loss(self, batch, skip_statistics=False) -> Tuple[Loss, LossStatistics]: vae_terms = compute_vae_terms(self.vae, batch[self.data_key]) kl = vae_terms.kl likelihood = vae_terms.likelihood set_loss = compute_set_loss(self.vae, batch[self.set_key]) total_loss = (-likelihood + self._beta * kl + self.set_loss_weight * set_loss) eval_statistics = OrderedDict() if not skip_statistics: eval_statistics['log_prob'] = np.mean(ptu.get_numpy(likelihood)) eval_statistics['kl'] = np.mean(ptu.get_numpy(kl)) eval_statistics['set_loss'] = np.mean(ptu.get_numpy(set_loss)) eval_statistics['loss'] = np.mean(ptu.get_numpy(total_loss)) eval_statistics['beta'] = self._beta for k, v in vae_terms.p_x_given_z.get_diagnostics().items(): eval_statistics['p_x_given_z/{}'.format(k)] = v for k, v in vae_terms.q_z.get_diagnostics().items(): eval_statistics['q_z_given_x/{}'.format(k)] = v for name, set_list in [ ('eval', self.eval_sets), ('train', self.train_sets), ]: for set_i, set in enumerate(set_list): vae_terms = compute_vae_terms(self.vae, set) kl = vae_terms.kl likelihood = vae_terms.likelihood set_loss = compute_set_loss(self.vae, set) eval_statistics['{}/set{}/log_prob'.format( name, set_i)] = np.mean(ptu.get_numpy(likelihood)) eval_statistics['{}/set{}/kl'.format(name, set_i)] = np.mean( ptu.get_numpy(kl)) eval_statistics['{}/set{}/set_loss'.format( name, set_i)] = (np.mean(ptu.get_numpy(set_loss))) set_prior = compute_prior(self.vae.encoder(set)) eval_statistics.update( create_stats_ordered_dict( '{}/set{}/learned_prior/mean'.format(name, set_i), ptu.get_numpy(set_prior.mean))) eval_statistics.update( create_stats_ordered_dict( '{}/set{}/learned_prior/stddev'.format( name, set_i), ptu.get_numpy(set_prior.stddev))) for k, v in vae_terms.p_x_given_z.get_diagnostics().items( ): eval_statistics['{}/set{}/p_x_given_z/{}'.format( name, set_i, k)] = v for k, v in vae_terms.q_z.get_diagnostics().items(): eval_statistics['{}/set{}/q_z_given_x/{}'.format( name, set_i, k)] = v return total_loss, eval_statistics
def _do_training(self): tmp_batch = self.get_batch() random_state = tmp_batch['observations'] losses = [] batch = self.get_batch() obs = batch['observations'] actions = batch['actions'] next_obs = batch['next_observations'] ob_deltas_pred = self.model(obs, actions) next_obs_pred = obs + ob_deltas_pred if self.vectorized: distance_to_random_state_pred = ((next_obs_pred - random_state)**2) distance_to_random_state = ((next_obs - random_state)**2) squared_errors = (distance_to_random_state_pred - distance_to_random_state)**2 loss = squared_errors.mean() else: distance_to_random_state_pred = ((next_obs_pred - random_state)**2).sum( 1, keepdim=True) distance_to_random_state = ((next_obs - random_state)**2).sum( 1, keepdim=True) squared_errors = (distance_to_random_state_pred - distance_to_random_state)**2 loss = squared_errors.mean() self.optimizer.zero_grad() loss.backward() self.optimizer.step() losses.append(ptu.get_numpy(loss)) if self.eval_statistics is None: self.eval_statistics = OrderedDict() self.eval_statistics.update( create_stats_ordered_dict( 'Model Loss', losses, always_show_all_stats=True, exclude_max_min=True, )) self.eval_statistics.update( create_stats_ordered_dict( 'Distance To Random State', ptu.get_numpy(distance_to_random_state), )) self.eval_statistics.update( create_stats_ordered_dict( 'Distance To Random State Predicted', ptu.get_numpy(distance_to_random_state_pred), ))
def _do_training(self): batch = self.get_batch() """ Optimize Critic/Actor. """ rewards = batch['rewards'] terminals = batch['terminals'] obs = batch['observations'] actions = batch['actions'] next_obs = batch['next_observations'] _, _, v_pred = self.target_policy(next_obs, None) y_target = self.reward_scale * rewards + ( 1. - terminals) * self.discount * v_pred y_target = y_target.detach() mu, y_pred, v = self.policy(obs, actions) policy_loss = self.policy_criterion(y_pred, y_target) self.policy_optimizer.zero_grad() policy_loss.backward() self.policy_optimizer.step() """ Update Target Networks """ if self.use_soft_update: ptu.soft_update_from_to(self.policy, self.target_policy, self.tau) else: if self._n_train_steps_total % self.target_hard_update_period == 0: ptu.copy_model_params_from_to(self.policy, self.target_policy) if self.need_to_update_eval_statistics: self.need_to_update_eval_statistics = False self.eval_statistics['Policy Loss'] = np.mean( ptu.get_numpy(policy_loss)) self.eval_statistics.update( create_stats_ordered_dict( 'Policy v', ptu.get_numpy(v), )) self.eval_statistics.update( create_stats_ordered_dict( 'Policy mu', ptu.get_numpy(mu), )) self.eval_statistics.update( create_stats_ordered_dict( 'Y targets', ptu.get_numpy(y_target), )) self.eval_statistics.update( create_stats_ordered_dict( 'Y predictions', ptu.get_numpy(y_pred), ))
def get_diagnostics(self): stats = OrderedDict() stats.update( create_stats_ordered_dict( 'mean', ptu.get_numpy(self.mean), # exclude_max_min=True, )) stats.update( create_stats_ordered_dict( 'std', ptu.get_numpy(self.distribution.stddev), )) return stats
def _do_training(self): batch = self.get_batch() obs = batch['observations'] actions = batch['actions'] next_obs = batch['next_observations'] """ Policy operations. """ inputs = torch.cat((obs, self.env.convert_obs_to_goals(next_obs)), dim=1) policy_actions = self.policy(inputs) policy_loss = self.policy_criterion(policy_actions, actions) """ Update Networks """ self.policy_optimizer.zero_grad() policy_loss.backward() self.policy_optimizer.step() if self.need_to_update_eval_statistics: self.need_to_update_eval_statistics = False """ This way, these statistics are only computed for one batch. """ self.eval_statistics = OrderedDict() self.eval_statistics['Policy Loss'] = np.mean( ptu.get_numpy(policy_loss)) self.eval_statistics.update( create_stats_ordered_dict( 'Policy Action', ptu.get_numpy(policy_actions), ))
def get_diagnostics(self): stats = OrderedDict() stats.update( create_stats_ordered_dict( 'mean', ptu.get_numpy(self.mean), )) stats.update( create_stats_ordered_dict('normal/std', ptu.get_numpy(self.normal_std))) stats.update( create_stats_ordered_dict( 'normal/log_std', ptu.get_numpy(torch.log(self.normal_std)), )) return stats
def log_diagnostics(self, paths, logger=default_logger): lms = get_stat_in_paths(paths, 'agent_infos', 'lagrange_multiplier') for key, value in create_stats_ordered_dict( "TDM LBFGS Lagrange Multiplier", lms, ).items(): logger.record_tabular(key, value)
def _statistics_from_subtraj_batch(self, subtraj_batch, stat_prefix=''): statistics = OrderedDict() critic_dict = self.get_critic_output_dict(subtraj_batch) for name, tensor in critic_dict.items(): statistics.update( create_stats_ordered_dict('{} QF {}'.format(stat_prefix, name), ptu.get_numpy(tensor))) policy_dict = self.get_policy_output_dict(subtraj_batch) for name, tensor in policy_dict.items(): statistics.update( create_stats_ordered_dict( '{} Policy {}'.format(stat_prefix, name), ptu.get_numpy(tensor))) return statistics
def save_gradient_norm(gradient): if self.need_to_update_eval_statistics: self.extra_eval_statistics.update( create_stats_ordered_dict( key, ptu.get_numpy(gradient.data.norm(p=2, dim=1)), always_show_all_stats=True, ))
def __call__(self, paths: List[Path], contexts: List[Context]) -> Diagnostics: goals = [c[self._desired_goal_key] for c in contexts] achieved_goals = [ np.array( [o[self._achieved_goal_key] for o in path['observations']]) for path in paths ] statistics = OrderedDict() stat_to_lists = defaultdict(list) for achieved, goal in zip(achieved_goals, goals): difference = achieved - goal distance = np.linalg.norm(difference, axis=-1) stat_to_lists['distance'].append(distance) stat_to_lists['success'].append( distance <= self._success_threshold) for stat_name, stat_list in stat_to_lists.items(): statistics.update( create_stats_ordered_dict( stat_name, stat_list, always_show_all_stats=True, )) statistics.update( create_stats_ordered_dict( '{}/final'.format(stat_name), [s[-1:] for s in stat_list], always_show_all_stats=True, exclude_max_min=True, )) statistics.update( create_stats_ordered_dict( '{}/initial'.format('distance'), [s[:1] for s in stat_to_lists['distance']], always_show_all_stats=True, exclude_max_min=True, )) statistics.update( create_stats_ordered_dict( '{}/any'.format('success'), [any(s) for s in stat_to_lists['success']], always_show_all_stats=True, exclude_max_min=True, )) return statistics
def get_diagnostics(self): stats = OrderedDict() stats.update( create_stats_ordered_dict( 'probability', ptu.get_numpy(self.probs), )) return stats
def log_diagnostics(self, paths, logger=default_logger): statistics = OrderedDict() for name_in_env_infos, name_to_log in [ ('reward_dist', 'Distance Reward'), ('reward_ctrl', 'Action Reward'), ]: stat = get_stat_in_paths(paths, 'env_infos', name_in_env_infos) statistics.update(create_stats_ordered_dict( name_to_log, stat, )) distances = get_stat_in_paths(paths, 'env_infos', 'reward_dist') statistics.update(create_stats_ordered_dict( "Final Distance Reward", [ds[-1] for ds in distances], )) for key, value in statistics.items(): logger.record_tabular(key, value)
def get_diagnostics(self): stats = OrderedDict() stats.update( create_stats_ordered_dict( 'alpha', ptu.get_numpy(self.concentration0), )) stats.update( create_stats_ordered_dict( 'beta', ptu.get_numpy(self.concentration1), )) stats.update( create_stats_ordered_dict( 'entropy', ptu.get_numpy(self.entropy()), )) return stats
def log_diagnostics(self, paths, logger=default_logger): statistics = OrderedDict() for name_in_env_infos, name_to_log in [ ('posafter', 'Position'), ('height', 'Height'), ('angle', 'Angle'), ]: stats = get_stat_in_paths(paths, 'env_infos', name_in_env_infos) statistics.update(create_stats_ordered_dict( name_to_log, stats, )) statistics.update( create_stats_ordered_dict( "Final " + name_to_log, [s[-1] for s in stats], )) for key, value in statistics.items(): logger.record_tabular(key, value)
def _compute_target_q_value( self, discount, rewards, terminals, bootstrap_value, statistics_log, update_statistics, ): scaled_rewards = rewards * self.reward_scale del rewards if self.reward_type == self.NORMAL_REWARD: reward_target = scaled_rewards elif self.reward_type == self.DISCOUNTED_REWARD: reward_target = scaled_rewards * (1 - discount) elif self.reward_type == self.DISCOUNTED_PLUS_TIME_KL: kl_reward = kl_divergence( Bernoulli(discount), self.prior_on_discount, ) reward_target = (scaled_rewards * (1 - discount) + kl_reward) if update_statistics: statistics_log.update( create_stats_ordered_dict( 'time_kl_reward', ptu.get_numpy(kl_reward), )) statistics_log.update( create_stats_ordered_dict( 'inferred_discount', ptu.get_numpy(discount), )) else: raise ValueError("Unknown update type".format(self.reward_type)) if self._multiply_bootstrap_by_prior_discount: bootstrap_target = ((1. - terminals) * discount * bootstrap_value * self.discount) else: bootstrap_target = ((1. - terminals) * discount * bootstrap_value) q_target = reward_target + bootstrap_target return q_target
def log_diagnostics(self, paths): target_onehots = [] for path in paths: first_observation = path["observations"][0][:self.n+1] target_onehots.append(first_observation) final_predictions = [] # each element has shape (dim) nonfinal_predictions = [] # each element has shape (seq_length-1, dim) for path in paths: actions = path["actions"] if self._softmax_action: actions = softmax(actions, axis=-1) final_predictions.append(actions[-1]) nonfinal_predictions.append(actions[:-1]) nonfinal_predictions_sequence_dimension_flattened = np.vstack( nonfinal_predictions ) # shape = N X dim nonfinal_prob_zero = [softmax[0] for softmax in nonfinal_predictions_sequence_dimension_flattened] final_probs_correct = [] for final_prediction, target_onehot in zip(final_predictions, target_onehots): correct_pred_idx = np.argmax(target_onehot) final_probs_correct.append(final_prediction[correct_pred_idx]) final_prob_zero = [softmax[0] for softmax in final_predictions] last_statistics = OrderedDict() last_statistics.update(create_stats_ordered_dict( 'Final P(correct)', final_probs_correct)) last_statistics.update(create_stats_ordered_dict( 'Non-final P(zero)', nonfinal_prob_zero)) last_statistics.update(create_stats_ordered_dict( 'Final P(zero)', final_prob_zero)) for key, value in last_statistics.items(): logger.record_tabular(key, value) return final_probs_correct
def get_diagnostics(self, paths): statistics = OrderedDict() for stat_name_in_paths, stat_name_to_print in [ ('arm_object_distance', 'Distance hand to object'), ('arm_goal_distance', 'Distance hand to goal'), ]: stats = get_stat_in_paths(paths, 'env_infos', stat_name_in_paths) statistics.update( create_stats_ordered_dict( stat_name_to_print, stats, always_show_all_stats=True, )) final_stats = [s[-1] for s in stats] statistics.update( create_stats_ordered_dict( "Final " + stat_name_to_print, final_stats, always_show_all_stats=True, )) return statistics
def compute_loss(self, batch, epoch=-1, test=False): prefix = "test/" if test else "train/" positives = self.positives.random_batch( self.batch_size)["observations"] P, feature_size = positives.shape positives = ptu.from_numpy(positives) negatives = batch['observations'] N, feature_size = negatives.shape X = torch.cat((positives, negatives)) Y = np.zeros((P + N, 1)) Y[:P, 0] = 1 # Y[P:, 0] = 0 # X = ptu.from_numpy(X) Y = ptu.from_numpy(Y) y_pred = self.GAIL_discriminator_logits(X) loss = self.loss_fn(y_pred, Y) y_pred_class = (y_pred > 0).float() self.update_with_classification_stats(y_pred_class, Y, prefix) self.eval_statistics.update( create_stats_ordered_dict( "y_pred_positives", ptu.get_numpy(y_pred[:P]), )) self.eval_statistics.update( create_stats_ordered_dict( "y_pred_negatives", ptu.get_numpy(y_pred[P:]), )) self.eval_statistics['epoch'] = epoch self.eval_statistics[prefix + "losses"].append(loss.item()) return loss
def get_diagnostics(self): path_lens = [len(path['actions']) for path in self._epoch_paths] stats = OrderedDict([ ('num steps total', self._num_steps_total), ('num paths total', self._num_paths_total), ]) stats.update( create_stats_ordered_dict( "path length", path_lens, always_show_all_stats=True, )) return stats
def get_diagnostics(self): if self._vae_sample_probs is None or self._vae_sample_priorities is None: stats = create_stats_ordered_dict( 'VAE Sample Weights', np.zeros(self._size), ) stats.update(create_stats_ordered_dict( 'VAE Sample Probs', np.zeros(self._size), )) else: vae_sample_priorities = self._vae_sample_priorities[:self._size] vae_sample_probs = self._vae_sample_probs[:self._size] stats = create_stats_ordered_dict( 'VAE Sample Weights', vae_sample_priorities, ) stats.update(create_stats_ordered_dict( 'VAE Sample Probs', vae_sample_probs, )) return stats
def get_diagnostics(self): stats = OrderedDict() stats.update( create_stats_ordered_dict( 'mean', ptu.get_numpy(self.mean), )) stats.update( create_stats_ordered_dict( 'std', ptu.get_numpy(self.distribution.stddev), )) stats.update( create_stats_ordered_dict( 'log_std', ptu.get_numpy(torch.log(self.distribution.stddev)), )) stats.update( create_stats_ordered_dict( 'entropy', ptu.get_numpy(self.entropy()), )) return stats