Exemplo n.º 1
0
def extract_cuts(
    cutset: Pathlike,
    output_cutset: Pathlike,
    storage_path: Pathlike,
    feature_manifest: Optional[Pathlike],
    storage_type: str,
    num_jobs: int,
):
    """
    Extract features for cuts in a given CUTSET manifest.
    The features are stored in STORAGE_PATH, and the output manifest
    with features is stored in OUTPUT_CUTSET.
    """
    from lhotse import CutSet

    cuts: CutSet = CutSet.from_file(cutset)
    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())
    cuts = cuts.compute_and_store_features(
        extractor=feature_extractor,
        storage_path=storage_path,
        num_jobs=num_jobs,
        storage_type=get_writer(storage_type),
    )
    Path(output_cutset).parent.mkdir(parents=True, exist_ok=True)
    cuts.to_file(output_cutset)
Exemplo n.º 2
0
def extract(recording_manifest: Pathlike, output_dir: Pathlike,
            feature_manifest: Optional[Pathlike], storage_type: str,
            lilcom_tick_power: int, root_dir: Optional[Pathlike],
            num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    recordings: RecordingSet = RecordingSet.from_json(recording_manifest)
    if root_dir is not None:
        recordings = recordings.with_path_prefix(root_dir)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)
    storage_path = output_dir / 'feats.h5' if 'hdf5' in storage_type else output_dir / 'storage'

    with get_writer(storage_type)(storage_path,
                                  tick_power=lilcom_tick_power) as storage:
        feature_set_builder = FeatureSetBuilder(
            feature_extractor=feature_extractor,
            storage=storage,
        )
        feature_set_builder.process_and_store_recordings(
            recordings=recordings,
            output_manifest=output_dir / 'feature_manifest.json.gz',
            num_jobs=num_jobs)
Exemplo n.º 3
0
def extract_cuts_batch(
    cutset: Pathlike,
    output_cutset: Pathlike,
    storage_path: Pathlike,
    feature_manifest: Optional[Pathlike],
    storage_type: str,
    num_jobs: int,
    batch_duration: Seconds,
):
    """
    Extract features for cuts in a given CUTSET manifest.
    The features are stored in STORAGE_PATH, and the output manifest
    with features is stored in OUTPUT_CUTSET.

    This version enables CUDA acceleration for feature extractors
    that support it (e.g., kaldifeat extractors).

    \b
    Example usage of kaldifeat fbank with CUDA:

        $ pip install kaldifeat  # note: ensure it's compiled with CUDA

        $ lhotse feat write-default-config -f kaldifeat-fbank feat.yml

        $ sed 's/device: cpu/device: cuda/' feat.yml feat-cuda.yml

        $ lhotse feat extract-cuts-batch -f feat-cuda.yml cuts.jsonl cuts_with_feats.jsonl feats.h5
    """
    from lhotse import CutSet

    cuts: CutSet = CutSet.from_file(cutset)
    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())
    cuts = cuts.compute_and_store_features_batch(
        extractor=feature_extractor,
        storage_path=storage_path,
        batch_duration=batch_duration,
        num_workers=num_jobs,
        storage_type=get_writer(storage_type),
    )
    Path(output_cutset).parent.mkdir(parents=True, exist_ok=True)
    cuts.to_file(output_cutset)
Exemplo n.º 4
0
def make_feats(
        audio_manifest: Pathlike,
        output_dir: Pathlike,
        segmentation_manifest: Optional[Pathlike],
        # TODO: augmentation manifest should specify a number of transforms and probability of their application
        # e.g.:
        # "add_noise", "prob": 0.5, "noise_recordings": ["path1.wav", "path2.wav"]
        # "reverberate", "prob": 0.2, "rirs": ["rir1.wav", "rir2.wav"] (or however the RIRs are stored like... can be params for simulation)
        augmentation_manifest: Optional[Pathlike],
        feature_manifest: Optional[Pathlike],
        compressed: bool,
        lilcom_tick_power: int,
        root_dir: Optional[Pathlike],
        num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    audio_set = RecordingSet.from_yaml(audio_manifest)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest) if
                         feature_manifest is not None else FeatureExtractor())

    # TODO: to be used (actually, only the segmentation info will be used, and all supervision info will be ignored)
    supervision_set = (SupervisionSet.from_yaml(segmentation_manifest)
                       if segmentation_manifest is not None else None)

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)

    feature_set_builder = FeatureSetBuilder(
        feature_extractor=feature_extractor,
        output_dir=output_dir,
        root_dir=root_dir,
        augmentation_manifest=augmentation_manifest)
    feature_set_builder.process_and_store_recordings(
        recordings=audio_set,
        segmentation=None,  # TODO: implement and use
        compressed=compressed,
        lilcom_tick_power=lilcom_tick_power,
        num_jobs=num_jobs)
Exemplo n.º 5
0
def extract(audio_manifest: Pathlike, output_dir: Pathlike,
            segmentation_manifest: Optional[Pathlike], augmentation: str,
            feature_manifest: Optional[Pathlike], compressed: bool,
            lilcom_tick_power: int, root_dir: Optional[Pathlike],
            num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    audio_set = RecordingSet.from_json(audio_manifest)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())

    # TODO: to be used (actually, only the segmentation info will be used, and all supervision info will be ignored)
    supervision_set = (SupervisionSet.from_json(segmentation_manifest)
                       if segmentation_manifest is not None else None)

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)

    augmenter = None
    if augmentation is not None:
        sampling_rate = next(iter(audio_set)).sampling_rate
        assert all(rec.sampling_rate == sampling_rate for rec in audio_set), \
            "Wav augmentation effect chains expect all the recordings to have the same sampling rate at this time."
        augmenter = WavAugmenter.create_predefined(name=augmentation,
                                                   sampling_rate=sampling_rate)

    feature_set_builder = FeatureSetBuilder(
        feature_extractor=feature_extractor,
        output_dir=output_dir,
        root_dir=root_dir,
        augmenter=augmenter)
    feature_set_builder.process_and_store_recordings(
        recordings=audio_set,
        segmentation=None,  # TODO: implement and use
        compressed=compressed,
        lilcom_tick_power=lilcom_tick_power,
        num_jobs=num_jobs)
Exemplo n.º 6
0
def extract(recording_manifest: Pathlike, output_dir: Pathlike,
            augmentation: str, feature_manifest: Optional[Pathlike],
            storage_type: str, lilcom_tick_power: int,
            root_dir: Optional[Pathlike], num_jobs: int):
    """
    Extract features for recordings in a given AUDIO_MANIFEST. The features are stored in OUTPUT_DIR,
    with one file per recording (or segment).
    """
    recordings: RecordingSet = RecordingSet.from_json(recording_manifest)
    if root_dir is not None:
        recordings = recordings.with_path_prefix(root_dir)

    feature_extractor = (FeatureExtractor.from_yaml(feature_manifest)
                         if feature_manifest is not None else Fbank())

    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True, parents=True)
    storage_path = output_dir / 'feats.h5' if 'hdf5' in storage_type else output_dir / 'storage'

    augmenter = None
    if augmentation is not None:
        sampling_rate = next(iter(recordings)).sampling_rate
        assert all(rec.sampling_rate == sampling_rate for rec in recordings), \
            "Wav augmentation effect chains expect all the recordings to have the same sampling rate at this time."
        augmenter = WavAugmenter.create_predefined(name=augmentation,
                                                   sampling_rate=sampling_rate)

    with get_writer(storage_type)(storage_path,
                                  tick_power=lilcom_tick_power) as storage:
        feature_set_builder = FeatureSetBuilder(
            feature_extractor=feature_extractor,
            storage=storage,
            augmenter=augmenter)
        feature_set_builder.process_and_store_recordings(
            recordings=recordings,
            output_manifest=output_dir / 'feature_manifest.json.gz',
            num_jobs=num_jobs)
Exemplo n.º 7
0
def test_feature_extractor_generic_deserialization():
    fe = Fbank()
    with NamedTemporaryFile() as f:
        fe.to_yaml(f.name)
        fe_deserialized = FeatureExtractor.from_yaml(f.name)
    assert fe_deserialized.config == fe.config