Exemplo n.º 1
0
other_params = {}
some_augmentation = None


@mark.parametrize(
    [
        "recording_id",
        "channel",
        "start",
        "duration",
        "exception_expectation",
        "expected_num_frames",
    ],
    [
        ("recording-1", 0, 0.0, None, does_not_raise(), 50),  # whole recording
        (
            "recording-1",
            0,
            0.0,
            0.499,
            does_not_raise(),
            50,
        ),  # practically whole recording
        ("recording-2", 0, 0.0, 0.7, does_not_raise(), 70),
        ("recording-2", 0, 0.5, 0.5, does_not_raise(), 50),
        ("recording-2", 1, 0.25, 0.65, does_not_raise(), 65),
        ("recording-nonexistent", 0, 0.0, None, raises(KeyError),
         None),  # no recording
        ("recording-1", 1000, 0.0, None, raises(KeyError), None),  # no channel
        (
Exemplo n.º 2
0
    assert padded.num_samples == expected_num_samples

    mixed_feats = padded.load_features()
    assert mixed_feats.shape == (2000, 40)
    np.testing.assert_allclose(mixed_feats[1604:, :], PADDING_LOG_ENERGY, atol=0.8)  # Only padding after 16.04s
    np.testing.assert_array_less(PADDING_LOG_ENERGY, mixed_feats[1603, :])  # Padding didn't start before 16.04s

    pre_mixed_feats = libri_cut.load_features()
    np.testing.assert_almost_equal(pre_mixed_feats, mixed_feats[:1604, :], decimal=5)


@pytest.mark.parametrize(
    ['duration', 'num_frames', 'num_samples', 'expected_duration', 'expected_num_frames', 'expected_num_samples',
     'exception_expectation'],
    [
        (20, None, None, 20, 2000, 320000, does_not_raise()),
        (None, 2000, None, 20, 2000, 320000, pytest.raises(AssertionError)),
        (None, None, 320000, 20, 2000, 320000, does_not_raise()),
    ]
)
def test_pad_simple_cut_audio_only(
        libri_cut,
        duration,
        num_frames,
        num_samples,
        expected_duration,
        expected_num_frames,
        expected_num_samples,
        exception_expectation
):
    libri_cut.features = None
Exemplo n.º 3
0
import pickle
from tempfile import NamedTemporaryFile

import pytest

from lhotse import AudioSource, Cut, CutSet, FeatureSet, Features, Recording, RecordingSet, SupervisionSegment, \
    SupervisionSet, load_manifest, store_manifest
from lhotse.utils import fastcopy, is_module_available, nullcontext as does_not_raise


@pytest.mark.parametrize(['path', 'exception_expectation'], [
    ('test/fixtures/audio.json', does_not_raise()),
    ('test/fixtures/supervision.json', does_not_raise()),
    ('test/fixtures/dummy_feats/feature_manifest.json', does_not_raise()),
    ('test/fixtures/libri/cuts.json', does_not_raise()),
    ('test/fixtures/feature_config.yml', pytest.raises(ValueError)),
    ('no/such/path.xd', pytest.raises(AssertionError)),
])
def test_load_any_lhotse_manifest(path, exception_expectation):
    with exception_expectation:
        load_manifest(path)


@pytest.fixture
def recording_set():
    return RecordingSet.from_recordings([
        Recording(id='x',
                  sources=[
                      AudioSource(type='file',
                                  channels=[0],
                                  source='text/fixtures/mono_c0.wav'),
Exemplo n.º 4
0
def test_cut_set_batch_feature_extraction(cut_set, extractor_type):
    extractor = extractor_type()
    cut_set = cut_set.resample(16000)
    with NamedTemporaryFile() as tmpf:
        cut_set_with_feats = cut_set.compute_and_store_features_batch(
            extractor=extractor,
            storage_path=tmpf.name,
            num_workers=0,
        )
        validate(cut_set_with_feats, read_data=True)


@pytest.mark.parametrize(
    ["suffix", "exception_expectation"],
    [
        (".jsonl", does_not_raise()),
        (".json", pytest.raises(InvalidPathExtension)),
    ],
)
def test_cut_set_batch_feature_extraction_manifest_path(
        cut_set, suffix, exception_expectation):
    extractor = Fbank()
    cut_set = cut_set.resample(16000)
    with NamedTemporaryFile() as feat_f, NamedTemporaryFile(
            suffix=suffix) as manifest_f:
        with exception_expectation:
            cut_set_with_feats = cut_set.compute_and_store_features_batch(
                extractor=extractor,
                storage_path=feat_f.name,
                manifest_path=manifest_f.name,
                num_workers=0,
Exemplo n.º 5
0

def test_get_stereo_audio_from_single_file(recording_set):
    samples = recording_set.load_audio("recording-2")
    np.testing.assert_almost_equal(samples, expected_stereo_single_source())


def test_load_audio_from_sphere_file(recording_set):
    samples = recording_set.load_audio("recording-3")
    np.testing.assert_almost_equal(samples, expected_stereo_single_source())


@mark.parametrize(
    ["channels", "expected_audio", "exception_expectation"],
    [
        (None, expected_stereo_two_sources(), does_not_raise()),
        (0, expected_channel_0(), does_not_raise()),
        (1, expected_channel_1(), does_not_raise()),
        ([0, 1], expected_stereo_two_sources(), does_not_raise()),
        (1000, "irrelevant", raises(AssertionError)),
    ],
)
def test_get_audio_multichannel(recording_set, channels, expected_audio,
                                exception_expectation):
    with exception_expectation:
        loaded_audio = recording_set.load_audio("recording-1",
                                                channels=channels)
        np.testing.assert_almost_equal(loaded_audio, expected_audio)


@mark.parametrize(
Exemplo n.º 6
0
    # Invariant 1: we receive the same amount of items in a dataloader epoch as there we in the CutSet
    assert len(sampled_cuts) == len(cut_set)
    # Invariant 2: the items are not duplicated
    assert len(set(c.id for c in sampled_cuts)) == len(sampled_cuts)
    # Invariant 3: the items are shuffled, i.e. the order is different than that in the CutSet
    assert [c.id for c in sampled_cuts] != [c.id for c in cut_set]


@pytest.mark.parametrize(
    ["max_duration", "max_frames", "max_samples", "exception_expectation"],
    [
        (
            None,
            None,
            None,
            does_not_raise(),
        ),  # represents no criterion (unlimited batch size)
        (10.0, None, None, does_not_raise()),
        (None, 1000, None, does_not_raise()),
        (None, None, 160000, does_not_raise()),
        (None, 1000, 160000, pytest.raises(AssertionError)),
        (5.0, 1000, 160000, pytest.raises(AssertionError)),
    ],
)
def test_single_cut_sampler_time_constraints(
    max_duration, max_frames, max_samples, exception_expectation
):
    # The dummy cuts have a duration of 1 second each
    cut_set = DummyManifest(CutSet, begin_id=0, end_id=100)
    if max_frames is None:
        cut_set = cut_set.drop_features()
Exemplo n.º 7
0
other_params = {}
some_augmentation = None


@mark.parametrize(
    [
        "recording_id",
        "channel",
        "start",
        "duration",
        "exception_expectation",
        "expected_num_frames",
    ],
    [
        ("recording-1", 0, 0.0, None, does_not_raise(), 50),  # whole recording
        (
            "recording-1",
            0,
            0.0,
            0.499,
            does_not_raise(),
            50,
        ),  # practically whole recording
        ("recording-2", 0, 0.0, 0.7, does_not_raise(), 70),
        ("recording-2", 0, 0.5, 0.5, does_not_raise(), 50),
        ("recording-2", 1, 0.25, 0.65, does_not_raise(), 65),
        ("recording-nonexistent", 0, 0.0, None, raises(KeyError),
         None),  # no recording
        ("recording-1", 1000, 0.0, None, raises(KeyError), None),  # no channel
        (
Exemplo n.º 8
0
                           start=0.5,
                           duration=6.0),
        SupervisionSegment(id='sup-2',
                           recording_id='irrelevant',
                           start=7.0,
                           duration=2.0),
        SupervisionSegment(id='sup-3',
                           recording_id='irrelevant',
                           start=13.0,
                           duration=2.5)
    ]


@pytest.mark.parametrize(
    ['offset', 'expected_duration', 'exception_expectation'],
    [(0, 10.0, does_not_raise()), (1, 11.0, does_not_raise()),
     (5, 15.0, does_not_raise()), (10, 20.0, does_not_raise()),
     (100, 'irrelevant', pytest.raises(AssertionError))])
def test_overlay_cut_duration_and_supervisions(offset, expected_duration,
                                               exception_expectation, cut1,
                                               cut2):
    with exception_expectation:
        mixed_cut = cut1.mix(cut2, offset_other_by=offset)

        assert isinstance(mixed_cut, MixedCut)
        assert mixed_cut.duration == expected_duration
        assert mixed_cut.supervisions == [
            SupervisionSegment(id='sup-1',
                               recording_id='irrelevant',
                               start=0.5,
                               duration=6.0),
import torchaudio

from lhotse import Fbank, KaldifeatFbank, KaldifeatFbankConfig, Mfcc
from lhotse.features import create_default_feature_extractor
from lhotse.features.kaldifeat import KaldifeatMelOptions, KaldifeatMfcc
from lhotse.utils import nullcontext as does_not_raise

kaldifeat = pytest.importorskip(
    "kaldifeat", reason="Kaldifeat tests require kaldifeat to be installed."
)


@pytest.mark.parametrize(
    ["feature_type", "exception_expectation"],
    [
        ("kaldifeat-fbank", does_not_raise()),
        ("kaldifeat-mfcc", does_not_raise()),
    ],
)
def test_feature_extractor(feature_type, exception_expectation):
    # For now, just test that it runs
    with exception_expectation:
        fe = create_default_feature_extractor(feature_type)
        samples, sr = torchaudio.load("test/fixtures/libri/libri-1088-134315-0000.wav")
        fe.extract(samples=samples, sampling_rate=sr)


def test_kaldifeat_config():
    x = np.arange(8000, dtype=np.float32)
    fe = KaldifeatFbank(KaldifeatFbankConfig(mel_opts=KaldifeatMelOptions(num_bins=27)))
    feats = fe.extract(x, sampling_rate=16000)
Exemplo n.º 10
0
from lhotse.audio import RecordingSet
from lhotse.augmentation import WavAugmenter, is_wav_augment_available
from lhotse.features import (Fbank, FeatureExtractor, FeatureMixer, FeatureSet,
                             FeatureSetBuilder, Features, Mfcc, Spectrogram,
                             create_default_feature_extractor)
from lhotse.features.io import LilcomFilesWriter, LilcomHdf5Writer, NumpyFilesWriter, NumpyHdf5Writer
from lhotse.testing.dummies import DummyManifest
from lhotse.utils import Seconds, time_diff_to_num_frames
from lhotse.utils import nullcontext as does_not_raise

other_params = {}
some_augmentation = None


@mark.parametrize(['feature_type', 'exception_expectation'],
                  [('mfcc', does_not_raise()), ('fbank', does_not_raise()),
                   ('spectrogram', does_not_raise()),
                   ('pitch', raises(Exception))])
def test_feature_extractor(feature_type, exception_expectation):
    # For now, just test that it runs
    with exception_expectation:
        fe = create_default_feature_extractor(feature_type)
        samples, sr = torchaudio.load(
            'test/fixtures/libri/libri-1088-134315-0000.wav')
        fe.extract(samples=samples, sampling_rate=sr)


def test_feature_extractor_serialization():
    fe = Fbank()
    with NamedTemporaryFile() as f:
        fe.to_yaml(f.name)
Exemplo n.º 11
0
    for batch in sampler:
        sampler_cut_ids.extend(batch)

    # Invariant 1: we receive the same amount of items in a dataloader epoch as there we in the CutSet
    assert len(sampler_cut_ids) == len(cut_set)
    # Invariant 2: the items are not duplicated
    assert len(set(sampler_cut_ids)) == len(sampler_cut_ids)
    # Invariant 3: the items are shuffled, i.e. the order is different than that in the CutSet
    assert sampler_cut_ids != [c.id for c in cut_set]


@pytest.mark.parametrize(
    ['max_duration', 'max_frames', 'max_samples', 'exception_expectation'],
    [
        (None, None, None,
         does_not_raise()),  # represents no criterion (unlimited batch size)
        (10.0, None, None, does_not_raise()),
        (None, 1000, None, does_not_raise()),
        (None, None, 160000, does_not_raise()),
        (None, 1000, 160000, pytest.raises(AssertionError)),
        (5.0, 1000, 160000, pytest.raises(AssertionError)),
    ])
def test_single_cut_sampler_time_constraints(max_duration, max_frames,
                                             max_samples,
                                             exception_expectation):
    # The dummy cuts have a duration of 1 second each
    cut_set = DummyManifest(CutSet, begin_id=0, end_id=100)
    if max_frames is None:
        cut_set = cut_set.drop_features()

    with exception_expectation:
Exemplo n.º 12
0
from tempfile import NamedTemporaryFile

import pytest
import torch
import torchaudio

from lhotse import Fbank, FeatureExtractor, create_default_feature_extractor
from lhotse.utils import nullcontext as does_not_raise


@pytest.mark.parametrize(
    ["feature_type", "exception_expectation"],
    [
        ("mfcc", does_not_raise()),
        ("fbank", does_not_raise()),
        ("spectrogram", does_not_raise()),
        ("pitch", pytest.raises(Exception)),
    ],
)
def test_feature_extractor(feature_type, exception_expectation):
    # For now, just test that it runs
    with exception_expectation:
        fe = create_default_feature_extractor(feature_type)
        samples, sr = torchaudio.load(
            "test/fixtures/libri/libri-1088-134315-0000.wav")
        fe.extract(samples=samples, sampling_rate=sr)


@pytest.mark.parametrize(
    ["feature_type", "exception_expectation"],
    [
Exemplo n.º 13
0
                           duration=6.0),
        SupervisionSegment(id="sup-2",
                           recording_id="irrelevant",
                           start=7.0,
                           duration=2.0),
        SupervisionSegment(id="sup-3",
                           recording_id="irrelevant",
                           start=13.0,
                           duration=2.5),
    ]


@pytest.mark.parametrize(
    ["offset", "allow_padding", "expected_duration", "exception_expectation"],
    [
        (0, False, 10.0, does_not_raise()),
        (1, False, 11.0, does_not_raise()),
        (5, False, 15.0, does_not_raise()),
        (10, False, 20.0, does_not_raise()),
        (100, False, "irrelevant", pytest.raises(AssertionError)),
        (100, True, 110.0, does_not_raise()),
    ],
)
def test_overlay_cut_duration_and_supervisions(offset, allow_padding,
                                               expected_duration,
                                               exception_expectation, cut1,
                                               cut2):
    with exception_expectation:
        mixed_cut = cut1.mix(cut2,
                             offset_other_by=offset,
                             allow_padding=allow_padding)
Exemplo n.º 14
0
other_params = {}
some_augmentation = None


@mark.parametrize(
    [
        "recording_id",
        "channel",
        "start",
        "duration",
        "exception_expectation",
        "expected_num_frames",
    ],
    [
        ("recording-1", 0, 0.0, None, does_not_raise(), 50),  # whole recording
        (
            "recording-1",
            0,
            0.0,
            0.499,
            does_not_raise(),
            50,
        ),  # practically whole recording
        ("recording-2", 0, 0.0, 0.7, does_not_raise(), 70),
        ("recording-2", 0, 0.5, 0.5, does_not_raise(), 50),
        ("recording-2", 1, 0.25, 0.65, does_not_raise(), 65),
        ("recording-nonexistent", 0, 0.0, None, raises(KeyError), None),  # no recording
        ("recording-1", 1000, 0.0, None, raises(KeyError), None),  # no channel
        (
            "recording-2",