def run_vad(self): """ Determine which frames contain speech and nonspeech, and store the resulting boolean mask internally. """ def _compute_runs(array): """ Compute runs as a list of arrays, each containing the indices of a contiguous run. :param array: the data array :type array: :class:`numpy.ndarray` (1D) :rtype: list of :class:`numpy.ndarray` (1D) """ if len(array) < 1: return [] return numpy.split(array, numpy.where(numpy.diff(array) != 1)[0] + 1) self.log(u"Creating VAD object") vad = VAD(rconf=self.rconf, logger=self.logger) self.log(u"Running VAD...") self.__mfcc_mask = vad.run_vad(self.__mfcc[0]) self.__mfcc_mask_map = (numpy.where(self.__mfcc_mask))[0] self.log(u"Running VAD... done") self.log(u"Storing speech and nonspeech intervals...") # where( == True) already computed, reusing # COMMENTED runs = _compute_runs((numpy.where(self.__mfcc_mask))[0]) runs = _compute_runs(self.__mfcc_mask_map) self.__speech_intervals = [(r[0], r[-1]) for r in runs] # where( == False) not already computed, computing now runs = _compute_runs((numpy.where(~self.__mfcc_mask))[0]) self.__nonspeech_intervals = [(r[0], r[-1]) for r in runs] self.log(u"Storing speech and nonspeech intervals... done")
def run_vad(self): """ Determine which frames contain speech and nonspeech, and store the resulting boolean mask internally. """ def _compute_runs(array): """ Compute runs as a list of arrays, each containing the indices of a contiguous run. :param array: the data array :type array: :class:`numpy.ndarray` (1D) :rtype: list of :class:`numpy.ndarray` (1D) """ if len(array) < 1: return [] return numpy.split(array, numpy.where(numpy.diff(array) != 1)[0] + 1) self.log(u"Creating VAD object") vad = VAD(rconf=self.rconf, logger=self.logger) self.log(u"Running VAD...") self.__mfcc_mask = vad.run_vad(self.__mfcc[0]) self.__mfcc_mask_map = (numpy.where(self.__mfcc_mask))[0] self.log(u"Running VAD... done") self.log(u"Storing speech and nonspeech intervals...") # where( == True) already computed, reusing #runs = _compute_runs((numpy.where(self.__mfcc_mask))[0]) runs = _compute_runs(self.__mfcc_mask_map) self.__speech_intervals = [(r[0], r[-1]) for r in runs] # where( == False) not already computed, computing now runs = _compute_runs((numpy.where(~self.__mfcc_mask))[0]) self.__nonspeech_intervals = [(r[0], r[-1]) for r in runs] self.log(u"Storing speech and nonspeech intervals... done")
def run_vad( self, log_energy_threshold=None, min_nonspeech_length=None, extend_before=None, extend_after=None ): """ Determine which frames contain speech and nonspeech, and store the resulting boolean mask internally. The four parameters might be ``None``: in this case, the corresponding RuntimeConfiguration values are applied. :param float log_energy_threshold: the minimum log energy threshold to consider a frame as speech :param int min_nonspeech_length: the minimum length, in frames, of a nonspeech interval :param int extend_before: extend each speech interval by this number of frames to the left (before) :param int extend_after: extend each speech interval by this number of frames to the right (after) """ def _compute_runs(array): """ Compute runs as a list of arrays, each containing the indices of a contiguous run. :param array: the data array :type array: :class:`numpy.ndarray` (1D) :rtype: list of :class:`numpy.ndarray` (1D) """ if len(array) < 1: return [] return numpy.split(array, numpy.where(numpy.diff(array) != 1)[0] + 1) self.log(u"Creating VAD object") vad = VAD(rconf=self.rconf, logger=self.logger) self.log(u"Running VAD...") self.__mfcc_mask = vad.run_vad( wave_energy=self.__mfcc[0], log_energy_threshold=log_energy_threshold, min_nonspeech_length=min_nonspeech_length, extend_before=extend_before, extend_after=extend_after ) self.__mfcc_mask_map = (numpy.where(self.__mfcc_mask))[0] self.log(u"Running VAD... done") self.log(u"Storing speech and nonspeech intervals...") # where( == True) already computed, reusing # COMMENTED runs = _compute_runs((numpy.where(self.__mfcc_mask))[0]) runs = _compute_runs(self.__mfcc_mask_map) self.__speech_intervals = [(r[0], r[-1]) for r in runs] # where( == False) not already computed, computing now runs = _compute_runs((numpy.where(~self.__mfcc_mask))[0]) self.__nonspeech_intervals = [(r[0], r[-1]) for r in runs] self.log(u"Storing speech and nonspeech intervals... done")
def run_vad(self, log_energy_threshold=None, min_nonspeech_length=None, extend_before=None, extend_after=None): """ Determine which frames contain speech and nonspeech, and store the resulting boolean mask internally. The four parameters might be ``None``: in this case, the corresponding RuntimeConfiguration values are applied. :param float log_energy_threshold: the minimum log energy threshold to consider a frame as speech :param int min_nonspeech_length: the minimum length, in frames, of a nonspeech interval :param int extend_before: extend each speech interval by this number of frames to the left (before) :param int extend_after: extend each speech interval by this number of frames to the right (after) """ def _compute_runs(array): """ Compute runs as a list of arrays, each containing the indices of a contiguous run. :param array: the data array :type array: :class:`numpy.ndarray` (1D) :rtype: list of :class:`numpy.ndarray` (1D) """ if len(array) < 1: return [] return numpy.split(array, numpy.where(numpy.diff(array) != 1)[0] + 1) self.log(u"Creating VAD object") vad = VAD(rconf=self.rconf, logger=self.logger) self.log(u"Running VAD...") self.__mfcc_mask = vad.run_vad( wave_energy=self.__mfcc[0], log_energy_threshold=log_energy_threshold, min_nonspeech_length=min_nonspeech_length, extend_before=extend_before, extend_after=extend_after) self.__mfcc_mask_map = (numpy.where(self.__mfcc_mask))[0] self.log(u"Running VAD... done") self.log(u"Storing speech and nonspeech intervals...") # where( == True) already computed, reusing # COMMENTED runs = _compute_runs((numpy.where(self.__mfcc_mask))[0]) runs = _compute_runs(self.__mfcc_mask_map) self.__speech_intervals = [(r[0], r[-1]) for r in runs] # where( == False) not already computed, computing now runs = _compute_runs((numpy.where(~self.__mfcc_mask))[0]) self.__nonspeech_intervals = [(r[0], r[-1]) for r in runs] self.log(u"Storing speech and nonspeech intervals... done")