def plot_experiment_set(experiment_names, metric_names='loss', experiments_base='experiments', file_name=None): experiment_names = utils.listify(experiment_names) metric_names = utils.listify(metric_names) n_axes = len(metric_names) fig, axs = plt.subplots(1, n_axes, figsize=(1 + n_axes * (4 + 1), 4)) if len(metric_names) == 1: axs = [axs] cmap = plt.get_cmap('Set1') for i, experiment_name in enumerate(experiment_names): colour = cmap(float(i) / len(experiment_names)) plot_experiment(experiment_name, metric_names, experiments_base, axs=axs, colour=colour, add_labels=i == 0) handles, labels = axs[0].get_legend_handles_labels() extra = Rectangle((0, 0), 1, 1, fc='w', fill=False, edgecolor='none', linewidth=0) lgd_pos = ((0.5 + 0.1) * n_axes - 0.1, -0.1 ) # 0.5 per subplot plus 0.1 between each subplot. lgd = axs[0].legend([extra] + handles, ['solid = train, dotted = valid'] + labels, loc='upper center', bbox_to_anchor=lgd_pos, fancybox=True, shadow=True, ncol=min(3, len(experiment_names) + 1)) plt.setp(lgd.get_lines(), linewidth=4.) if file_name: os.makedirs(os.path.join('plots', os.path.dirname(file_name)), exist_ok=True) save_path = os.path.join('plots', file_name) logger.info( 'Saving plot of metrics for multiple experiments to {}'.format( save_path)) plt.savefig(save_path, bbox_extra_artists=(lgd, ), bbox_inches='tight') plt.close(fig)
def save_batched_seqs(sequence_features, names, out_dir, seq_len=None, feat_names=None): """Saves multiple sequence features for multiple sentences, handling sequence length clipping and device detachment. Parameters ---------- sequence_features : dict[str, torch.Tensor], list[torch.Tensor], or torch.Tensor shape (batch_size, max_seq_len, feat_dim) Batched sequence features to be saved. If it is a dict, the keys are used as the subdirectory names and `feat_names` can be used to select a subset. If it is a list (or singleton), `feat_names` must be provided in order to determine the subdirectory names. names : list[str], shape (batch_size, ) Utterance names to use to save each item in the batch out_dir : str Path of directory under which to save each feature type. seq_len : np.ndarray or torch.Tensor, shape (batch_size,) Sequence length used to remove padding from each batch item. feat_names : list[str] Names of features to be saved, these determine the subdirectory names for saving under `out_dir`. If `sequence_features` is a dict, this can be used to select a subset of the features for saving. Notes ----- Each feature for each sentence is saved at, {out_dir}/{feat_name}/{name}.npy """ pred_dir = os.path.join(out_dir, 'feats') os.makedirs(pred_dir, exist_ok=True) if isinstance(sequence_features, dict): if feat_names is None: feat_names = sequence_features.keys() sequence_features = [ sequence_features[feat_name] for feat_name in feat_names ] else: if feat_names is None: raise ValueError( 'If sequences features is not a dictionary, then feat_names must be provided.' ) sequence_features = utils.detach_batched_seqs(*sequence_features, seq_len=seq_len) sequence_features = utils.listify(sequence_features) for feat_name, values in zip(feat_names, sequence_features): if isinstance(values[0], np.ndarray): tdt.file_io.save_dir(tdt.file_io.save_bin, path=os.path.join(pred_dir, feat_name), data=values, file_ids=names)
def fetch_params(self, speaker_ids, data_type=np.ndarray, deltas=False): r"""Gets the speaker-dependent normalisation parameters, taking into account the delta flag and type of data. Parameters ---------- speaker_ids : list[str] Names of speakers for each batch item. data_type : type Typically `torch.Tensor` for batched features, or `np.ndarray` for single sentences or visualisation code. deltas : bool Whether `feature` is a delta feature, and should be normalised using the delta parameters. Returns ------- sd_params : dict[str, torch.Tensor] or dict[str, np.ndarray], shape (batch_size, feat_dim) or (feat_dim) The speaker dependent parameters """ speaker_ids = utils.listify(speaker_ids) speaker_params = super(_SpeakerDependentNormaliser, self).fetch_params(data_type=data_type, deltas=deltas) sd_params = {} for speaker_id in speaker_ids: params = speaker_params[speaker_id] for name, param in params.items(): # For current speaker_id (item in batch) and current parameter (e.g. mean), concatenate along dim=0 param = param[None, ...] if name not in sd_params: sd_params[name] = param else: if data_type == torch.Tensor: sd_params[name] = torch.cat((sd_params[name], param)) else: sd_params[name] = np.concatenate( (sd_params[name], param)) for name, sd_param in sd_params.items(): sd_params[name] = sd_param.squeeze(0) return sd_params
def plot_experiment(experiment_name, metric_names='loss', experiments_base='experiments', axs=None, colour=None, add_labels=True, save=False): metric_names = utils.listify(metric_names) results_train = load_experiment_results(experiment_name, metric_names, 'train', experiments_base) results_valid = load_experiment_results(experiment_name, metric_names, 'valid', experiments_base) if axs is None: n_axes = len(metric_names) fig, axs = plt.subplots(1, n_axes, figsize=(1 + n_axes * (4 + 1), 4)) if len(metric_names) == 1: axs = [axs] for ax, metric_name in zip(axs, metric_names): metric_values_train = results_train[metric_name] ax.plot(list(metric_values_train.keys()), list(metric_values_train.values()), label=experiment_name, c=colour) metric_values_valid = results_valid[metric_name] ax.plot(list(metric_values_valid.keys()), list(metric_values_valid.values()), '--', c=colour) if add_labels: ax.set_xlabel('Epoch number') ax.set_ylabel(metric_name) if save: save_path = os.path.join(experiments_base, experiment_name, 'metrics.pdf') logger.info('Saving plot of metrics to {}'.format(save_path)) plt.savefig(save_path, bbox_inches='tight') return axs
def load_experiment_results(experiment_name, metric_names='loss', mode='train', experiments_base='experiments'): r"""Loads metrics from an experiment. Returns ------- results : dict[str, collections.OrderedDict[int, float]] Dictionary of results with the following structure, .. code:: python { metric_name: OrderedDict( epoch: metric_value ) } """ metric_names = utils.listify(metric_names) results = {metric_name: {} for metric_name in metric_names} model_path = os.path.join(experiments_base, experiment_name, mode) for epoch_str in os.listdir(model_path): metric_path = os.path.join(model_path, epoch_str, 'metrics.json') if os.path.isfile(metric_path): # Load metrics for this epoch from file. metrics = file_io.load_json(metric_path) epoch = int(epoch_str.split('_')[-1]) for metric_name in metric_names: if metric_name in metrics: results[metric_name][epoch] = metrics[metric_name] # Sort the keys of each metric by the epochs, as `os.listdir` will not use the correct numerical order. results = { metric_name: OrderedDict(sorted(result.items())) for metric_name, result in results.items() } return results
def accumulate(self, collection, **kwargs): r"""Accumulates to all metrics in kwargs. Parameters ---------- collection : str Metrics in this collection will be updated. kwargs : dict[str, tuple] Names of metrics, and inputs to each metric's accumulate function, e.g. a list of :class:`torch.Tensor`. """ for metric_name, inputs in kwargs.items(): # Allow multiple inputs to be specified (or one). inputs = utils.listify(inputs) # If a kwargs dict is specified for this metric. if isinstance(inputs[-1], dict): inputs, kwinputs = inputs[:-1], inputs[-1] else: kwinputs = dict() self[collection][metric_name].accumulate(*inputs, **kwinputs)
def __init__(self, z_dim, kld_weight, input_dim, input_names): super(VAMPPriorDataVAE, self).__init__(z_dim, kld_weight, input_dim) self.input_names = utils.listify(input_names)