def test_feature_set_serialization(format, compressed): feature_set = FeatureSet( features=[ Features( recording_id='irrelevant', channels=0, start=0.0, duration=20.0, type='fbank', num_frames=2000, num_features=20, sampling_rate=16000, storage_type='lilcom', storage_path='/irrelevant/', storage_key='path.llc' ) ] ) with NamedTemporaryFile(suffix='.gz' if compressed else '') as f: if format == 'json': feature_set.to_json(f.name) feature_set_deserialized = FeatureSet.from_json(f.name) if format == 'yaml': feature_set.to_yaml(f.name) feature_set_deserialized = FeatureSet.from_yaml(f.name) assert feature_set_deserialized == feature_set
def random_overlayed( supervision_manifest: Pathlike, feature_manifest: Pathlike, output_cut_manifest: Pathlike, random_seed: int, snr_range: Tuple[float, float], offset_range: Tuple[float, float] ): """ Create a CutSet stored in OUTPUT_CUT_MANIFEST that contains supervision regions from SUPERVISION_MANIFEST and features supplied by FEATURE_MANIFEST. It first creates a trivial CutSet, splits it into two equal, randomized parts and overlays their features to create a mix. The parameters of the mix are controlled via SNR_RANGE and OFFSET_RANGE. """ fix_random_seed(random_seed) supervision_set = SupervisionSet.from_yaml(supervision_manifest) feature_set = FeatureSet.from_yaml(feature_manifest) source_cut_set = make_cuts_from_supervisions(supervision_set=supervision_set, feature_set=feature_set) left_cuts, right_cuts = split(source_cut_set, num_splits=2, randomize=True) snrs = np.random.uniform(*snr_range, size=len(left_cuts)).tolist() relative_offsets = np.random.uniform(*offset_range, size=len(left_cuts)).tolist() overlayed_cut_set = CutSet.from_cuts( left_cut.overlay( right_cut, offset_other_by=left_cut.duration * relative_offset, snr=snr ) for left_cut, right_cut, snr, relative_offset in zip(left_cuts, right_cuts, snrs, relative_offsets) ) overlayed_cut_set.to_yaml(output_cut_manifest)
def simple( feature_manifest: Pathlike, output_cut_manifest: Pathlike, supervision_manifest: Optional[Pathlike], ): """ Create a CutSet stored in OUTPUT_CUT_MANIFEST that contains the regions and features supplied by FEATURE_MANIFEST. Optionally it can use a SUPERVISION_MANIFEST to select the regions and attach the corresponding supervisions to the cuts. This is the simplest way to create Cuts. """ feature_set = FeatureSet.from_yaml(feature_manifest) if supervision_manifest is None: cut_set = make_cuts_from_features(feature_set) else: supervision_set = SupervisionSet.from_yaml(supervision_manifest) cut_set = make_cuts_from_supervisions(feature_set=feature_set, supervision_set=supervision_set) cut_set.to_yaml(output_cut_manifest)
def test_feature_set_serialization(): feature_set = FeatureSet(feature_extractor=FeatureExtractor(), features=[ Features(recording_id='irrelevant', channel_id=0, start=0.0, duration=20.0, type='fbank', num_frames=2000, num_features=20, storage_type='lilcom', storage_path='/irrelevant/path.llc') ]) with NamedTemporaryFile() as f: feature_set.to_yaml(f.name) feature_set_deserialized = FeatureSet.from_yaml(f.name) assert feature_set_deserialized == feature_set
def windowed( feature_manifest: Pathlike, output_cut_manifest: Pathlike, cut_duration: float, cut_shift: Optional[float], keep_shorter_windows: bool ): """ Create a CutSet stored in OUTPUT_CUT_MANIFEST from feature regions in FEATURE_MANIFEST. The feature matrices are traversed in windows with CUT_SHIFT increments, creating cuts of constant CUT_DURATION. """ feature_set = FeatureSet.from_yaml(feature_manifest) cut_set = make_windowed_cuts_from_features( feature_set=feature_set, cut_duration=cut_duration, cut_shift=cut_shift, keep_shorter_windows=keep_shorter_windows ) cut_set.to_yaml(output_cut_manifest)
def test_load_features(recording_id: str, channel: int, start: float, duration: float, exception_expectation): # just test that it loads feature_set = FeatureSet.from_yaml( 'test/fixtures/dummy_feats/feature_manifest.yml') with exception_expectation: features = feature_set.load(recording_id, channel_id=channel, start=start, duration=duration) # expect a matrix assert len(features.shape) == 2 # expect time as the first dimension frame_shift = feature_set.feature_extractor.spectrogram_config.frame_shift if duration is not None: # left-hand expression ignores the frame_length - "maximize" the number of frames retained # also, allow a lee-way of +/- 2 frames assert duration / frame_shift == features.shape[0] # expect frequency as the second dimension assert feature_set.feature_extractor.mfcc_fbank_common_config.num_mel_bins == features.shape[ 1]
def test_load_features_with_default_arguments(): feature_set = FeatureSet.from_yaml( 'test/fixtures/dummy_feats/feature_manifest.yml') features = feature_set.load('recording-1')