Exemplo n.º 1
0
def get_datastream(path,
                   which_set,
                   batch_size=1,
                   norm_path=None,
                   use_ivectors=False,
                   truncate_ivectors=False,
                   ivector_dim=100,
                   shuffled=True):
    wsj_dataset = H5PYDataset(path, which_sets=(which_set, ))
    if shuffled:
        iterator_scheme = ShuffledScheme(batch_size=batch_size,
                                         examples=wsj_dataset.num_examples)
    else:
        iterator_scheme = SequentialScheme(batch_size=batch_size,
                                           examples=wsj_dataset.num_examples)
    base_stream = DataStream(dataset=wsj_dataset,
                             iteration_scheme=iterator_scheme)

    if norm_path:
        data_mean_std = numpy.load(norm_path)
        base_stream = Normalize(data_stream=base_stream,
                                means=data_mean_std['mean'],
                                stds=data_mean_std['std'])

    if use_ivectors:
        fs = FilterSources(data_stream=base_stream,
                           sources=['features', 'ivectors', 'targets'])
        if truncate_ivectors:
            fs = TruncateTransformer(fs, 'ivectors', ivector_dim)
        # fs = ConcatenateTransformer(fs, ['features', 'ivectors'], 'features')
    else:
        fs = FilterSources(data_stream=base_stream,
                           sources=['features', 'targets'])
    return Padding(fs)
Exemplo n.º 2
0
def construct_stream(dataset, rng, pool_size, maximum_frames, window_features,
                     **kwargs):
    """Construct data stream.

    Parameters:
    -----------
    dataset : Dataset
        Dataset to use.
    rng : numpy.random.RandomState
        Random number generator.
    pool_size : int
        Pool size for TIMIT dataset.
    maximum_frames : int
        Maximum frames for TIMIT datset.
    subsample : bool, optional
        Subsample features.
    pretrain_alignment : bool, optional
        Use phoneme alignment for pretraining.
    uniform_alignment : bool, optional
        Use uniform alignment for pretraining.

    """
    kwargs.setdefault('subsample', False)
    kwargs.setdefault('pretrain_alignment', False)
    kwargs.setdefault('uniform_alignment', False)
    stream = DataStream(dataset,
                        iteration_scheme=SequentialShuffledScheme(
                            dataset.num_examples, pool_size, rng))
    if kwargs['pretrain_alignment'] and kwargs['uniform_alignment']:
        stream = AddUniformAlignmentMask(stream)
    stream = Reshape('features', 'features_shapes', data_stream=stream)
    means, stds = dataset.get_normalization_factors()
    stream = Normalize(stream, means, stds)
    if not window_features == 1:
        stream = WindowFeatures(stream, 'features', window_features)
    if kwargs['pretrain_alignment']:
        stream = Reshape('alignments', 'alignments_shapes', data_stream=stream)
    stream = Mapping(stream, SortMapping(key=key))
    stream = MaximumFrameCache(max_frames=maximum_frames,
                               data_stream=stream,
                               rng=rng)
    stream = Padding(data_stream=stream, mask_sources=['features', 'phonemes'])
    if kwargs['pretrain_alignment']:
        stream = AlignmentPadding(stream, 'alignments')
        stream = Transpose(stream, [(1, 0, 2), (1, 0), (1, 0), (1, 0),
                                    (2, 1, 0)])
    else:
        stream = Transpose(stream, [(1, 0, 2), (1, 0), (1, 0), (1, 0)])

    stream = ForceFloatX(stream)
    if kwargs['subsample']:
        stream = Subsample(stream, 'features', 5)
        stream = Subsample(stream, 'features_mask', 5)
    return stream
Exemplo n.º 3
0
def timit_datastream(path, which_set, local_copy, pool_size, maximum_frames):

    # load dataset
    timit_dataset = Timit(which_set=which_set,
                          path=path,
                          local_copy=local_copy)

    # get statistics
    data_means, data_stds = timit_dataset.get_normalization_factors()

    # set shuffle range
    shuffle_rng = numpy.random.RandomState(123)

    # set iterator scheme
    iterator_scheme = SequentialShuffledScheme(
        num_examples=timit_dataset.num_examples,
        batch_size=pool_size,
        rng=shuffle_rng)

    # base data stream
    base_stream = DataStream(dataset=timit_dataset,
                             iteration_scheme=iterator_scheme)

    # reshape stream
    reshape_stream = Reshape(data_source='features',
                             shape_source='features_shapes',
                             data_stream=base_stream)

    # normalize data stream
    normalize_stream = Normalize(data_stream=reshape_stream,
                                 means=data_means,
                                 stds=data_stds)

    # sort data stream
    sort_stream = Mapping(data_stream=normalize_stream,
                          mapping=SortMapping(key=lambda x: x[0].shape[0]))

    # max frame stream
    max_frame_stream = MaximumFrameCache(max_frames=maximum_frames,
                                         data_stream=sort_stream,
                                         rng=shuffle_rng)

    # padding data stream
    padded_stream = Padding(data_stream=max_frame_stream,
                            mask_sources=['features', 'phonemes'])

    # floatX stream
    data_stream = ForceFloatX(padded_stream)
    return timit_dataset, data_stream
Exemplo n.º 4
0
def get_datastream(path, norm_path, which_set='train_si84', batch_size=1):
    wsj_dataset = H5PYDataset(path, which_sets=(which_set, ))
    data_mean_std = numpy.load(norm_path)

    iterator_scheme = ShuffledScheme(batch_size=batch_size,
                                     examples=wsj_dataset.num_examples)
    base_stream = DataStream(dataset=wsj_dataset,
                             iteration_scheme=iterator_scheme)
    base_stream = Normalize(data_stream=base_stream,
                            means=data_mean_std['mean'],
                            stds=data_mean_std['std'])
    fs = FilterSources(data_stream=base_stream,
                       sources=['features', 'targets'])
    padded_stream = Padding(data_stream=fs)
    return padded_stream