示例#1
0
    def __init__(self,
                 cuts: CutSet,
                 return_cuts: bool = False,
                 cut_transforms: List[Callable[[CutSet], CutSet]] = None,
                 input_transforms: List[Callable[[torch.Tensor],
                                                 torch.Tensor]] = None,
                 input_strategy: InputStrategy = PrecomputedFeatures()):
        """
        K2 ASR IterableDataset constructor.

        :param cuts: the ``CutSet`` to sample data from.
        :param return_cuts: When ``True``, will additionally return a "cut" field in each batch with the Cut
            objects used to create that batch.
        :param cut_transforms: A list of transforms to be applied on each sampled batch,
            before converting cuts to an input representation (audio/features).
            Examples: cut concatenation, noise cuts mixing, etc.
        :param input_transforms: A list of transforms to be applied on each sampled batch,
            after the cuts are converted to audio/features.
            Examples: normalization, SpecAugment, etc.
        :param input_strategy: Converts cuts into a collated batch of audio/features.
            By default, reads pre-computed features from disk.
        """
        super().__init__()
        # Initialize the fields
        self.cuts = cuts
        self.return_cuts = return_cuts
        self.cut_transforms = ifnone(cut_transforms, [])
        self.input_transforms = ifnone(input_transforms, [])
        self.input_strategy = input_strategy
        self._validate()
示例#2
0
    def __init__(
            self,
            return_cuts: bool = False,
            cut_transforms: List[Callable[[CutSet], CutSet]] = None,
            input_transforms: List[Callable[[torch.Tensor],
                                            torch.Tensor]] = None,
            input_strategy: BatchIO = PrecomputedFeatures(),
    ):
        """
        k2 ASR IterableDataset constructor.

        :param return_cuts: When ``True``, will additionally return a "cut" field in each batch with the Cut
            objects used to create that batch.
        :param cut_transforms: A list of transforms to be applied on each sampled batch,
            before converting cuts to an input representation (audio/features).
            Examples: cut concatenation, noise cuts mixing, etc.
        :param input_transforms: A list of transforms to be applied on each sampled batch,
            after the cuts are converted to audio/features.
            Examples: normalization, SpecAugment, etc.
        :param input_strategy: Converts cuts into a collated batch of audio/features.
            By default, reads pre-computed features from disk.
        """
        super().__init__()
        # Initialize the fields
        self.return_cuts = return_cuts
        self.cut_transforms = ifnone(cut_transforms, [])
        self.input_transforms = ifnone(input_transforms, [])
        self.input_strategy = input_strategy

        # This attribute is a workaround to constantly growing HDF5 memory
        # throughout the epoch. It regularly closes open file handles to
        # reset the internal HDF5 caches.
        self.hdf5_fix = Hdf5MemoryIssueFix(reset_interval=100)
示例#3
0
文件: vad.py 项目: underdogliu/lhotse
 def __init__(
     self,
     input_strategy: BatchIO = PrecomputedFeatures(),
     cut_transforms: Sequence[Callable[[CutSet], CutSet]] = None,
     input_transforms: Sequence[Callable[[torch.Tensor],
                                         torch.Tensor]] = None,
 ) -> None:
     super().__init__()
     self.input_strategy = input_strategy
     self.cut_transforms = ifnone(cut_transforms, [])
     self.input_transforms = ifnone(input_transforms, [])
示例#4
0
文件: vad.py 项目: aarora8/lhotse
 def __init__(
         self,
         cuts: CutSet,
         input_strategy: InputStrategy = PrecomputedFeatures(),
         cut_transforms: Sequence[Callable[[CutSet], CutSet]] = None,
         input_transforms: Sequence[Callable[[torch.Tensor], torch.Tensor]] = None
 ) -> None:
     super().__init__()
     validate(cuts)
     self.cuts = cuts
     self.input_strategy = input_strategy
     self.cut_transforms = ifnone(cut_transforms, [])
     self.input_transforms = ifnone(input_transforms, [])
示例#5
0
    def __init__(
        self,
        extractor: FeatureExtractor,
        wave_transforms: List[Callable[[torch.Tensor], torch.Tensor]] = None,
        num_workers: int = 0,
        use_batch_extract: bool = True,
        fault_tolerant: bool = False,
        executor_type: Type[ExecutorType] = ThreadPoolExecutor,
    ) -> None:
        """
        OnTheFlyFeatures' constructor.

        :param extractor: the feature extractor used on-the-fly (individually on each waveform).
        :param wave_transforms: an optional list of transforms applied on the batch of audio
            waveforms collated into a single tensor, right before the feature extraction.
        :param num_workers: when larger than 0, we will spawn an executor (of type specified
            by ``executor_type``) to read the audio data in parallel.
            Thread executor can be used with PyTorch's DataLoader, whereas Process executor
            would fail (but could be faster for other applications).
        :param use_batch_extract: when ``True``, we will call
            :meth:`~lhotse.features.base.FeatureExtractor.extract_batch` to compute the features
            as it is possibly faster. It has a restriction that all cuts must have the same
            sampling rate. If that is not the case, set this to ``False``.
        :param fault_tolerant: when ``True``, the cuts for which audio loading failed
            will be skipped. It will make ``__call__`` return an additional item,
            which is the CutSet for which we successfully read the audio.
            It may be a subset of the input CutSet.
        :param executor_type: the type of executor used for parallel audio reads
            (only relevant when ``num_workers>0``).
        """
        super().__init__(num_workers=num_workers, executor_type=executor_type)
        self.extractor = extractor
        self.wave_transforms = ifnone(wave_transforms, [])
        self.use_batch_extract = use_batch_extract
        self.fault_tolerant = fault_tolerant
示例#6
0
文件: base.py 项目: glynpu/lhotse
    def move_to_memory(
        self,
        start: Seconds = 0,
        duration: Optional[Seconds] = None,
    ) -> "Features":
        from lhotse.features.io import get_memory_writer

        if self.storage_type in ("memory_lilcom", "memory_writer"):
            return self  # nothing to do

        arr = self.load(start=start, duration=duration)
        if issubclass(arr.dtype.type, np.floating):
            writer = get_memory_writer("memory_lilcom")()
        else:
            writer = get_memory_writer("memory_raw")()
        data = writer.write("", arr)  # key is ignored by in memory writers
        return fastcopy(
            self,
            # note: to understand why start is set to zero here, consider two cases:
            # 1) this method moves the whole array to memory => the start was 0 anyway
            # 2) this method moves a subset of the array to memory => the manifest is
            #    now relative to the start of that subset, and since it describes the
            #    whole subset, start=0 and duration=self.duration
            start=0.0,
            duration=ifnone(duration, self.duration),
            num_frames=arr.shape[0],
            storage_type=writer.name,
            storage_key=data,
            storage_path="",
        )
示例#7
0
    def __init__(
        self,
        cuts: CutSet,
        cut_transforms: List[Callable[[CutSet], CutSet]] = None,
        feature_input_strategy: BatchIO = PrecomputedFeatures(),
        feature_transforms: Union[Sequence[Callable], Callable] = None,
        add_eos: bool = True,
        add_bos: bool = True,
    ) -> None:
        super().__init__()

        self.cuts = cuts
        self.token_collater = TokenCollater(cuts,
                                            add_eos=add_eos,
                                            add_bos=add_bos)
        self.cut_transforms = ifnone(cut_transforms, [])
        self.feature_input_strategy = feature_input_strategy

        if feature_transforms is None:
            feature_transforms = []
        elif not isinstance(feature_transforms, Sequence):
            feature_transforms = [feature_transforms]

        assert all(
            isinstance(transform, Callable) for transform in
            feature_transforms), "Feature transforms must be Callable"
        self.feature_transforms = feature_transforms
示例#8
0
    def __init__(
        self,
        cuts: CutSet,
        cut_transforms: List[Callable[[CutSet], CutSet]] = None,
        feature_input_strategy: InputStrategy = PrecomputedFeatures(),
        feature_transforms: Union[Sequence[Callable], Callable] = None,
        add_eos: bool = True,
        add_bos: bool = True,
    ) -> None:
        super().__init__()

        validate(cuts)
        for cut in cuts:
            assert (len(cut.supervisions) == 1
                    ), "Only the Cuts with single supervision are supported."

        self.cuts = cuts
        self.token_collater = TokenCollater(cuts,
                                            add_eos=add_eos,
                                            add_bos=add_bos)
        self.cut_transforms = ifnone(cut_transforms, [])
        self.feature_input_strategy = feature_input_strategy

        if feature_transforms is None:
            feature_transforms = []
        elif not isinstance(feature_transforms, Sequence):
            feature_transforms = [feature_transforms]

        assert all(isinstance(transform, Callable) for transform in feature_transforms), \
            "Feature transforms must be Callable"
        self.feature_transforms = feature_transforms
示例#9
0
 def __setattr__(self, key: str, value: Any):
     """
     This magic function is called when the user tries to set an attribute.
     We use it as syntactic sugar to store custom attributes in ``self.custom``
     field, so that they can be (de)serialized later.
     """
     if key in self.__dataclass_fields__:
         super().__setattr__(key, value)
     else:
         custom = ifnone(self.custom, {})
         custom[key] = value
         self.custom = custom
示例#10
0
    def __init__(
        self,
        iterator: Iterable,
        predicate: Callable[[Cut], bool],
        diagnostics: Optional[SamplingDiagnostics] = None,
    ) -> None:
        self.iterator = iterator
        self.predicate = predicate
        self.diagnostics = ifnone(diagnostics, SamplingDiagnostics())

        assert callable(
            self.predicate
        ), f"LazyFilter: 'predicate' arg must be callable (got {predicate})."
示例#11
0
    def __init__(
        self,
        extractor: FeatureExtractor,
        wave_transforms: List[Callable[[torch.Tensor], torch.Tensor]] = None
    ) -> None:
        """
        OnTheFlyFeatures' constructor.

        :param extractor: the feature extractor used on-the-fly (individually on each waveform).
        :param wave_transforms: an optional list of transforms applied on the batch of audio
            waveforms collated into a single tensor, right before the feature extraction.
        """
        self.extractor = extractor
        self.wave_transforms = ifnone(wave_transforms, [])
示例#12
0
 def __init__(
     self,
     datapipe: Iterable[Union[Cut, Tuple[Cut]]],
     max_frames: int = None,
     max_samples: int = None,
     max_duration: Seconds = None,
     max_cuts: Optional[int] = None,
     drop_last: bool = False,
     diagnostics: Optional[SamplingDiagnostics] = None,
 ) -> None:
     self.datapipe = datapipe
     self.reuse_cuts_buffer = deque()
     self.drop_last = drop_last
     self.max_cuts = max_cuts
     self.diagnostics = ifnone(diagnostics, SamplingDiagnostics())
     self.time_constraint = TimeConstraint(max_duration=max_duration,
                                           max_frames=max_frames,
                                           max_samples=max_samples)
示例#13
0
    def __init__(
        self,
        cuts: Iterable[Union[Cut, Tuple[Cut]]],
        duration_bins: List[Seconds],
        max_duration: float,
        max_cuts: Optional[int] = None,
        drop_last: bool = False,
        buffer_size: int = 10000,
        strict: bool = False,
        rng: random.Random = None,
        diagnostics: Optional[SamplingDiagnostics] = None,
    ) -> None:
        self.cuts = cuts
        self.duration_bins = duration_bins
        self.max_duration = max_duration
        self.max_cuts = max_cuts
        self.drop_last = drop_last
        self.buffer_size = buffer_size
        self.strict = strict
        self.diagnostics = ifnone(diagnostics, SamplingDiagnostics())
        if rng is None:
            rng = random.Random()
        self.rng = rng

        assert duration_bins == sorted(duration_bins), (
            f"Argument list for 'duration_bins' is expected to be in "
            f"sorted order (got: {duration_bins}).")

        # A heuristic diagnostic first, for finding the right settings.
        mean_duration = np.mean(duration_bins)
        expected_buffer_duration = buffer_size * mean_duration
        expected_bucket_duration = expected_buffer_duration / (
            len(duration_bins) + 1)
        if expected_bucket_duration < max_duration:
            warnings.warn(
                f"Your 'buffer_size' setting of {buffer_size} might be too low to satisfy "
                f"a 'max_duration' of {max_duration} (given our best guess).")

        # Init: create empty buckets (note: `num_buckets = len(duration_bins) + 1`).
        self.buckets: List[Deque[Union[Cut, Tuple[Cut]]]] = [
            deque() for _ in range(len(duration_bins) + 1)
        ]
示例#14
0
    def __init__(
        self,
        extractor: FeatureExtractor,
        wave_transforms: List[Callable[[torch.Tensor], torch.Tensor]] = None,
        num_workers: int = 0,
        use_batch_extract: bool = True,
        executor_type: Type[ExecutorType] = ThreadPoolExecutor,
    ) -> None:
        """
        OnTheFlyFeatures' constructor.

        :param extractor: the feature extractor used on-the-fly (individually on each waveform).
        :param wave_transforms: an optional list of transforms applied on the batch of audio
            waveforms collated into a single tensor, right before the feature extraction.
        :param use_batch_extract: when ``True``, we will call
            :meth:`~lhotse.features.base.FeatureExtractor.extract_batch` to compute the features
            as it is possibly faster. It has a restriction that all cuts must have the same
            sampling rate. If that is not the case, set this to ``False``.
        """
        super().__init__(num_workers=num_workers, executor_type=executor_type)
        self.extractor = extractor
        self.wave_transforms = ifnone(wave_transforms, [])
        self.use_batch_extract = use_batch_extract
示例#15
0
 def __init__(self, segments: Mapping[str, SupervisionSegment]) -> None:
     self.segments = ifnone(segments, {})
示例#16
0
 def __init__(self, recordings: Mapping[str, Recording] = None) -> None:
     self.recordings = ifnone(recordings, {})
示例#17
0
 def __init__(self, features: List[Features] = None) -> None:
     self.features = sorted(ifnone(features, []))
示例#18
0
 def get(self, key, or_=None):
     return ifnone(self._find_key(key), or_)
示例#19
0
文件: base.py 项目: glynpu/lhotse
 def __init__(self, features: List[Features] = None) -> None:
     self.features = ifnone(features, [])
     if isinstance(self.features, list):
         self.features = sorted(self.features)