Exemplo n.º 1
0
def test_window_stream():
    sentences = [
        list(numpy.random.randint(10, size=sentence_length))
        for sentence_length in [3, 5, 7]
    ]
    stream = DataStream(IterableDataset(sentences))
    windows = Window(0, 4, 4, True, stream)
    for i, (source, target) in enumerate(windows.get_epoch_iterator()):
        assert source == target
    assert i == 5  # Total of 6 windows

    # Make sure that negative indices work
    windows = Window(-2, 4, 4, False, stream)
    for i, (source, target) in enumerate(windows.get_epoch_iterator()):
        assert source[-2:] == target[:2]
    assert i == 1  # Should get 2 examples

    # Even for overlapping negative indices should work
    windows = Window(-2, 4, 4, True, stream)
    for i, (source, target) in enumerate(windows.get_epoch_iterator()):
        assert source[:2] == target[-2:]
    assert i == 1  # Should get 2 examples
Exemplo n.º 2
0
def get_stream(batch_size,
               source_window=4000,
               target_window=1000,
               num_examples=5000):
    from fuel.datasets.youtube_audio import YouTubeAudio
    data = YouTubeAudio('XqaJ2Ol5cC4')
    train_stream = data.get_example_stream()
    train_stream = ForceFloatX(train_stream)
    window_stream = Window(0,
                           source_window,
                           target_window,
                           overlapping=False,
                           data_stream=train_stream)
    source_stream = FilterSources(window_stream, sources=('features', ))
    feats_stream = Mapping(source_stream, mfcc)
    targets_stream = FilterSources(window_stream, sources=('targets', ))
    targets_stream = Flatten(targets_stream)
    stream = Merge((feats_stream, targets_stream),
                   sources=('features', 'targets'))
    #Add a random Scheme?
    it_scheme = ConstantScheme(batch_size, num_examples)
    batched_stream = Batch(stream, it_scheme, strictness=1)
    return batched_stream