Exemplo n.º 1
0
def plot_experiment_set(experiment_names,
                        metric_names='loss',
                        experiments_base='experiments',
                        file_name=None):
    experiment_names = utils.listify(experiment_names)
    metric_names = utils.listify(metric_names)

    n_axes = len(metric_names)
    fig, axs = plt.subplots(1, n_axes, figsize=(1 + n_axes * (4 + 1), 4))
    if len(metric_names) == 1:
        axs = [axs]
    cmap = plt.get_cmap('Set1')

    for i, experiment_name in enumerate(experiment_names):
        colour = cmap(float(i) / len(experiment_names))
        plot_experiment(experiment_name,
                        metric_names,
                        experiments_base,
                        axs=axs,
                        colour=colour,
                        add_labels=i == 0)

    handles, labels = axs[0].get_legend_handles_labels()
    extra = Rectangle((0, 0),
                      1,
                      1,
                      fc='w',
                      fill=False,
                      edgecolor='none',
                      linewidth=0)
    lgd_pos = ((0.5 + 0.1) * n_axes - 0.1, -0.1
               )  # 0.5 per subplot plus 0.1 between each subplot.
    lgd = axs[0].legend([extra] + handles,
                        ['solid = train, dotted = valid'] + labels,
                        loc='upper center',
                        bbox_to_anchor=lgd_pos,
                        fancybox=True,
                        shadow=True,
                        ncol=min(3,
                                 len(experiment_names) + 1))

    plt.setp(lgd.get_lines(), linewidth=4.)

    if file_name:
        os.makedirs(os.path.join('plots', os.path.dirname(file_name)),
                    exist_ok=True)
        save_path = os.path.join('plots', file_name)

        logger.info(
            'Saving plot of metrics for multiple experiments to {}'.format(
                save_path))
        plt.savefig(save_path, bbox_extra_artists=(lgd, ), bbox_inches='tight')
        plt.close(fig)
Exemplo n.º 2
0
def save_batched_seqs(sequence_features,
                      names,
                      out_dir,
                      seq_len=None,
                      feat_names=None):
    """Saves multiple sequence features for multiple sentences, handling sequence length clipping and device detachment.

    Parameters
    ----------
    sequence_features : dict[str, torch.Tensor], list[torch.Tensor], or torch.Tensor
        shape (batch_size, max_seq_len, feat_dim)
        Batched sequence features to be saved.
        If it is a dict, the keys are used as the subdirectory names and `feat_names` can be used to select a subset.
        If it is a list (or singleton), `feat_names` must be provided in order to determine the subdirectory names.
    names : list[str], shape (batch_size, )
        Utterance names to use to save each item in the batch
    out_dir : str
        Path of directory under which to save each feature type.
    seq_len : np.ndarray or torch.Tensor, shape (batch_size,)
        Sequence length used to remove padding from each batch item.
    feat_names : list[str]
        Names of features to be saved, these determine the subdirectory names for saving under `out_dir`.
        If `sequence_features` is a dict, this can be used to select a subset of the features for saving.

    Notes
    -----
    Each feature for each sentence is saved at, {out_dir}/{feat_name}/{name}.npy
    """
    pred_dir = os.path.join(out_dir, 'feats')
    os.makedirs(pred_dir, exist_ok=True)

    if isinstance(sequence_features, dict):
        if feat_names is None:
            feat_names = sequence_features.keys()

        sequence_features = [
            sequence_features[feat_name] for feat_name in feat_names
        ]

    else:
        if feat_names is None:
            raise ValueError(
                'If sequences features is not a dictionary, then feat_names must be provided.'
            )

    sequence_features = utils.detach_batched_seqs(*sequence_features,
                                                  seq_len=seq_len)
    sequence_features = utils.listify(sequence_features)

    for feat_name, values in zip(feat_names, sequence_features):

        if isinstance(values[0], np.ndarray):
            tdt.file_io.save_dir(tdt.file_io.save_bin,
                                 path=os.path.join(pred_dir, feat_name),
                                 data=values,
                                 file_ids=names)
Exemplo n.º 3
0
    def fetch_params(self, speaker_ids, data_type=np.ndarray, deltas=False):
        r"""Gets the speaker-dependent normalisation parameters, taking into account the delta flag and type of data.

        Parameters
        ----------
        speaker_ids : list[str]
            Names of speakers for each batch item.
        data_type : type
            Typically `torch.Tensor` for batched features, or `np.ndarray` for single sentences or visualisation code.
        deltas : bool
            Whether `feature` is a delta feature, and should be normalised using the delta parameters.

        Returns
        -------
        sd_params : dict[str, torch.Tensor] or dict[str, np.ndarray], shape (batch_size, feat_dim) or (feat_dim)
            The speaker dependent parameters
        """
        speaker_ids = utils.listify(speaker_ids)
        speaker_params = super(_SpeakerDependentNormaliser,
                               self).fetch_params(data_type=data_type,
                                                  deltas=deltas)

        sd_params = {}
        for speaker_id in speaker_ids:

            params = speaker_params[speaker_id]

            for name, param in params.items():
                # For current speaker_id (item in batch) and current parameter (e.g. mean), concatenate along dim=0
                param = param[None, ...]

                if name not in sd_params:
                    sd_params[name] = param

                else:
                    if data_type == torch.Tensor:
                        sd_params[name] = torch.cat((sd_params[name], param))
                    else:
                        sd_params[name] = np.concatenate(
                            (sd_params[name], param))

        for name, sd_param in sd_params.items():
            sd_params[name] = sd_param.squeeze(0)

        return sd_params
Exemplo n.º 4
0
def plot_experiment(experiment_name,
                    metric_names='loss',
                    experiments_base='experiments',
                    axs=None,
                    colour=None,
                    add_labels=True,
                    save=False):
    metric_names = utils.listify(metric_names)
    results_train = load_experiment_results(experiment_name, metric_names,
                                            'train', experiments_base)
    results_valid = load_experiment_results(experiment_name, metric_names,
                                            'valid', experiments_base)

    if axs is None:
        n_axes = len(metric_names)
        fig, axs = plt.subplots(1, n_axes, figsize=(1 + n_axes * (4 + 1), 4))
        if len(metric_names) == 1:
            axs = [axs]

    for ax, metric_name in zip(axs, metric_names):
        metric_values_train = results_train[metric_name]
        ax.plot(list(metric_values_train.keys()),
                list(metric_values_train.values()),
                label=experiment_name,
                c=colour)

        metric_values_valid = results_valid[metric_name]
        ax.plot(list(metric_values_valid.keys()),
                list(metric_values_valid.values()),
                '--',
                c=colour)

        if add_labels:
            ax.set_xlabel('Epoch number')
            ax.set_ylabel(metric_name)

    if save:
        save_path = os.path.join(experiments_base, experiment_name,
                                 'metrics.pdf')

        logger.info('Saving plot of metrics to {}'.format(save_path))
        plt.savefig(save_path, bbox_inches='tight')

    return axs
Exemplo n.º 5
0
def load_experiment_results(experiment_name,
                            metric_names='loss',
                            mode='train',
                            experiments_base='experiments'):
    r"""Loads metrics from an experiment.

    Returns
    -------
    results : dict[str, collections.OrderedDict[int, float]]
        Dictionary of results with the following structure,

        .. code:: python

            {
                metric_name: OrderedDict(
                    epoch: metric_value
                )
            }
    """
    metric_names = utils.listify(metric_names)
    results = {metric_name: {} for metric_name in metric_names}

    model_path = os.path.join(experiments_base, experiment_name, mode)
    for epoch_str in os.listdir(model_path):

        metric_path = os.path.join(model_path, epoch_str, 'metrics.json')
        if os.path.isfile(metric_path):

            # Load metrics for this epoch from file.
            metrics = file_io.load_json(metric_path)

            epoch = int(epoch_str.split('_')[-1])
            for metric_name in metric_names:
                if metric_name in metrics:
                    results[metric_name][epoch] = metrics[metric_name]

    # Sort the keys of each metric by the epochs, as `os.listdir` will not use the correct numerical order.
    results = {
        metric_name: OrderedDict(sorted(result.items()))
        for metric_name, result in results.items()
    }

    return results
Exemplo n.º 6
0
    def accumulate(self, collection, **kwargs):
        r"""Accumulates to all metrics in kwargs.

        Parameters
        ----------
        collection : str
            Metrics in this collection will be updated.
        kwargs : dict[str, tuple]
            Names of metrics, and inputs to each metric's accumulate function, e.g. a list of :class:`torch.Tensor`.
        """
        for metric_name, inputs in kwargs.items():
            # Allow multiple inputs to be specified (or one).
            inputs = utils.listify(inputs)

            # If a kwargs dict is specified for this metric.
            if isinstance(inputs[-1], dict):
                inputs, kwinputs = inputs[:-1], inputs[-1]
            else:
                kwinputs = dict()

            self[collection][metric_name].accumulate(*inputs, **kwinputs)
Exemplo n.º 7
0
 def __init__(self, z_dim, kld_weight, input_dim, input_names):
     super(VAMPPriorDataVAE, self).__init__(z_dim, kld_weight, input_dim)
     self.input_names = utils.listify(input_names)