Exemplo n.º 1
0
    def __call__(self, current_file, return_sr=False):
        """Obtain waveform

        Parameters
        ----------
        current_file : dict
            `pyannote.database` files.
        return_sr : `bool`, optional
            Return sample rate. Defaults to False

        Returns
        -------
        waveform : `pyannote.core.SlidingWindowFeature`
            Waveform
        sample_rate : `int`
            Only when `return_sr` is set to True
        """

        if "waveform" in current_file:

            if self.sample_rate is None:
                msg = ("`RawAudio` needs to be instantiated with an actual "
                       "`sample_rate` if one wants to use precomputed "
                       "waveform.")
                raise ValueError(msg)
            sample_rate = self.sample_rate

            y = current_file["waveform"]

            if len(y.shape) != 2:
                msg = (f"Precomputed waveform should be provided as a "
                       f"(n_samples, n_channels) `np.ndarray`.")
                raise ValueError(msg)

        else:
            y, sample_rate = sf.read(current_file["audio"],
                                     dtype="float32",
                                     always_2d=True)

        # extract specific channel if requested
        channel = current_file.get("channel", None)
        if channel is not None:
            y = y[:, channel - 1:channel]

        y = self.get_features(y, sample_rate)

        sliding_window = SlidingWindow(start=-0.5 / sample_rate,
                                       duration=1.0 / sample_rate,
                                       step=1.0 / sample_rate)

        if return_sr:
            return (
                SlidingWindowFeature(y, sliding_window),
                sample_rate if self.sample_rate is None else self.sample_rate,
            )

        return SlidingWindowFeature(y, sliding_window)
Exemplo n.º 2
0
    def apply(self, current_file):
        """Compute predictions on a sliding window

        Parameter
        ---------
        current_file : dict

        Returns
        -------
        predictions : SlidingWindowFeature
        """

        # frame and sub-sequence sliding windows
        frames = self.feature_extraction.sliding_window()

        batches = [
            batch for batch in self.from_file(current_file, incomplete=True)
        ]
        if not batches:
            data = np.zeros((0, self.dimension), dtype=np.float32)
            return SlidingWindowFeature(data, frames)

        fX = np.vstack(batches)

        subsequences = SlidingWindow(duration=self.duration, step=self.step)

        # get total number of frames
        if isinstance(self.feature_extraction, Precomputed):
            n_frames, _ = self.feature_extraction.shape(current_file)
        else:
            uri = get_unique_identifier(current_file)
            n_frames, _ = self.preprocessed_[uri].data

        # data[i] is the sum of all predictions for frame #i
        data = np.zeros((n_frames, self.dimension), dtype=np.float32)

        # k[i] is the number of sequences that overlap with frame #i
        k = np.zeros((n_frames, 1), dtype=np.int8)

        for subsequence, fX_ in zip(subsequences, fX):

            # indices of frames overlapped by subsequence
            indices = frames.crop(subsequence,
                                  mode='center',
                                  fixed=self.duration)

            # accumulate the outputs
            data[indices] += fX_

            # keep track of the number of overlapping sequence
            # TODO - use smarter weights (e.g. Hamming window)
            k[indices] += 1

        # compute average embedding of each frame
        data = data / np.maximum(k, 1)

        return SlidingWindowFeature(data, frames)
Exemplo n.º 3
0
    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        # no input ==> no output
        if sequence is Stream.NoNewData:
            return Stream.NoNewData

        if sequence is Stream.EndOfStream:
            if not self.initialized_:
                return Stream.EndOfStream

            self.initialized_ = False
            data = self.agg_func(self.buffer_, axis=0)
            return SlidingWindowFeature(data, self.frames_)

        if not self.initialized_:
            return self.initialize(sequence)

        # check that feature sequence uses the common time base
        sw = sequence.sliding_window
        assert sw.duration == self.frames_.duration
        assert sw.step == self.frames_.step
        assert sw.start > self.frames_.start

        delta_start = sw.start - self.frames_.start
        ready = self.frames_.samples(delta_start, mode='center')
        data = self.agg_func(self.buffer_[:, :ready], axis=0)
        output = SlidingWindowFeature(data, self.frames_)

        self.buffer_ = self.buffer_[:, ready:]
        self.frames_ = SlidingWindow(start=sw.start,
                                     duration=sw.duration,
                                     step=sw.step)

        # remove empty (all NaN) buffers
        n_buffers = self.buffer_.shape[0]
        for i in range(n_buffers):
            if np.any(~np.isnan(self.buffer_[i])):
                break
        self.buffer_ = self.buffer_[i:]

        n_samples = self.buffer_.shape[1]
        n_new_samples = sequence.data.shape[0]
        pad_width = ((0, 1), (0, max(0, n_new_samples - n_samples)))
        for _ in sequence.data.shape[1:]:
            pad_width += ((0, 0), )
        self.buffer_ = np.pad(self.buffer_,
                              pad_width,
                              'constant',
                              constant_values=np.NAN)
        self.buffer_[-1] = sequence.data

        return output
Exemplo n.º 4
0
    def apply(self, current_file):

        # Speech Activity Detection

        # get raw SAD scores
        soft_sad = self.sad_(current_file)

        # check once and for all whether SAD scores are log-scaled
        if not hasattr(self, 'sad_log_scale_'):
            if np.nanmean(soft_sad.data) < 0:
                self.sad_log_scale_ = True
            else:
                self.sad_log_scale_ = False

        # get SAD probability
        prob_sad = np.exp(soft_sad.data) if self.sad_log_scale_ \
                   else soft_sad.data

        # support both non-speech/speech & non-speech/single/overlap
        prob_sad = 1. - prob_sad[:, 0]
        prob_sad = SlidingWindowFeature(prob_sad, soft_sad.sliding_window)

        # binarization
        hard_sad = self.sad_binarize_.apply(prob_sad)

        # Speaker Change Detection

        # get raw SCD scores
        soft_scd = self.scd_(current_file)

        # check once and for all whether SCD scores are log-scaled
        if not hasattr(self, 'scd_log_scale_'):
            if np.nanmean(soft_scd.data) < 0:
                self.scd_log_scale_ = True
            else:
                self.scd_log_scale_ = False

        # get SCD probability
        prob_scd = np.exp(soft_scd.data) if self.scd_log_scale_ \
                   else soft_scd.data

        # take the final dimension
        # (in order to support both classification and regression scores)
        prob_scd = prob_scd[:, -1]
        prob_scd = SlidingWindowFeature(prob_scd, soft_scd.sliding_window)

        # peak detection
        hard_scd = self.scd_peak_.apply(prob_scd)

        speech_turns = hard_scd.crop(hard_sad)

        # only process the annotated part
        speech_turns = speech_turns.crop(get_annotated(current_file))

        return speech_turns
Exemplo n.º 5
0
    def apply(self, current_file):
        """Compute embeddings on a sliding window

        Parameter
        ---------
        current_file : dict

        Returns
        -------
        embedding : SlidingWindowFeature
        """

        # compute embedding on sliding window
        # over the whole duration of the file
        fX = np.vstack(
            [batch for batch in self.from_file(current_file,
                                               incomplete=True)])

        subsequences = SlidingWindow(duration=self.duration, step=self.step)

        if not self.internal:
            return SlidingWindowFeature(fX, subsequences)

        # get total number of frames
        identifier = get_unique_identifier(current_file)
        n_frames = self.preprocessed_['X'][identifier].data.shape[0]

        # data[i] is the sum of all embeddings for frame #i
        data = np.zeros((n_frames, self.dimension), dtype=np.float32)

        # k[i] is the number of sequences that overlap with frame #i
        k = np.zeros((n_frames, 1), dtype=np.int8)

        # frame and sub-sequence sliding windows
        frames = self.feature_extractor.sliding_window()

        for subsequence, fX_ in zip(subsequences, fX):

            # indices of frames overlapped by subsequence
            indices = frames.crop(subsequence,
                                  mode='center',
                                  fixed=self.duration)

            # accumulate their embedding
            data[indices] += fX_

            # keep track of the number of overlapping sequence
            k[indices] += 1

        # compute average embedding of each frame
        data = data / np.maximum(k, 1)

        return SlidingWindowFeature(data, frames)
Exemplo n.º 6
0
    def __call__(self, features, sliding_window=None):
        """Apply short-term standardization

        Parameters
        ----------
        features : `SlidingWindowFeature` or (n_samples, n_features ) `numpy.ndarray`
            Features.
        sliding_window : `SlidingWindow`, optional
            Sliding window when `features` is a `numpy.ndarray`.
            Not used when `features` is a `SlidingWindowFeature` instance.

        Returns
        -------
        normalized : `SlidingWindowFeature` or (n_samples, n_features ) `numpy.ndarray`
            Standardized features
        """

        if isinstance(features, SlidingWindowFeature):
            features_ = features
        else:
            features_ = SlidingWindowFeature(features, sliding_window)

        window = features_.sliding_window.samples(self.duration, mode="center")
        if not window % 2:
            window += 1

        rolling = pd.DataFrame(features_.data).rolling(window=window,
                                                       center=True,
                                                       min_periods=window)
        mu = np.array(rolling.mean())
        sigma = np.array(rolling.std(ddof=1))

        for i in range(window // 2):

            data = features_.data[:i + window // 2 + 1, :]
            mu[i] = np.mean(data, axis=0)
            sigma[i] = np.std(data, axis=0, ddof=1)

            data = features_.data[-i - window // 2 - 1:, :]
            mu[-i - 1] = np.mean(data, axis=0)
            sigma[-i - 1] = np.std(data, axis=0, ddof=1)

        sigma[sigma == 0.0] = 1e-6

        normalized_ = (features_.data - mu) / sigma

        if isinstance(features, SlidingWindowFeature):
            return SlidingWindowFeature(normalized_, features.sliding_window)
        else:
            return normalized_
Exemplo n.º 7
0
    def __call__(self, features, sliding_window=None):
        """Apply global standardization

        Parameters
        ----------
        features : `SlidingWindowFeature` or (n_samples, n_features ) `numpy.ndarray`
            Features.
        sliding_window : `SlidingWindow`, optional
            Not used.

        Returns
        -------
        normalized : `SlidingWindowFeature` or (n_samples, n_features ) `numpy.ndarray`
            Standardized features
        """

        if isinstance(features, SlidingWindowFeature):
            data = features.data
        else:
            data = features

        mu = np.mean(data, axis=0)
        sigma = np.std(data, axis=0, ddof=1)
        sigma[sigma == 0.0] = 1e-6

        normalized = (data - mu) / sigma

        if isinstance(features, SlidingWindowFeature):
            return SlidingWindowFeature(normalized, features.sliding_window)
        else:
            return normalized
Exemplo n.º 8
0
    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        if sequence in [Stream.EndOfStream, Stream.NoNewData]:
            return sequence

        # append to buffer
        if self.initialized_:

            # check that feature sequence uses the common time base
            sw = sequence.sliding_window
            assert sw.duration == self.frames_.duration
            assert sw.step == self.frames_.step

            # check that first frame is exactly the one that is expected
            expected = self.frames_[len(self.buffer_)]
            assert np.allclose(expected, sw[0])

            # append the new samples at the end of buffer
            self.buffer_ = np.concatenate([self.buffer_, sequence.data],
                                          axis=0)

        # initialize buffer
        else:
            self.initialize(sequence)

        return SlidingWindowFeature(self.buffer_, self.frames_)
Exemplo n.º 9
0
    def preprocess(self, current_file, identifier=None):
        """Pre-compute file-wise X and y"""

        current_file = self.yaafe_preprocess(
            current_file, identifier=identifier)

        if identifier in self.preprocessed_.setdefault('y', {}):
            return current_file

        X = self.preprocessed_['X'][identifier]
        sw = X.sliding_window
        n_samples = X.getNumber()

        y = np.zeros((n_samples + 1, 2), dtype=np.int8)
        # [0,1] ==> speech / [1, 0] ==> non speech / [0, 0] ==> unknown

        annotated = current_file['annotated']
        annotation = current_file['annotation']

        coverage = annotation.get_timeline().coverage()

        for gap in coverage.gaps(annotated):
            indices = sw.crop(gap, mode='loose')
            y[indices, 0] = 1

        for segment in coverage:
            indices = sw.crop(segment, mode='loose')
            y[indices, 1] = 1

        y = SlidingWindowFeature(y[:-1], sw)
        self.preprocessed_['y'][identifier] = y

        return current_file
Exemplo n.º 10
0
    def apply(self, current_file):
        """Computes BIC distance between sliding windows

        Parameter
        ---------
        current_file : dict

        Returns
        -------
        predictions : SlidingWindowFeature
        """

        from pyannote.algorithms.stats.gaussian import Gaussian

        t, left, right = next(self.from_file(current_file))

        y = []
        for xL, xR in zip(left, right):
            gL = Gaussian(covariance_type=self.covariance_type).fit(xL)
            gR = Gaussian(covariance_type=self.covariance_type).fit(xR)
            y.append(gL.bic(gR, penalty_coef=0)[0])

        y = np.array(y)

        window = SlidingWindow(duration=2 * self.duration,
                               step=self.step,
                               start=0.)
        return SlidingWindowFeature(y, window)
Exemplo n.º 11
0
    def apply(self, wav):
        """Computes distance between sliding windows embeddings

        Parameter
        ---------
        wav : str
            Path to wav audio file

        Returns
        -------
        predictions : SlidingWindowFeature
        """

        from pyannote.algorithms.stats.gaussian import Gaussian

        current_file = {'uri': wav, 'medium': {'wav': wav}}
        t, left, right = next(self.from_file(current_file))

        y = []
        for xL, xR in zip(left, right):
            gL = Gaussian(covariance_type='diag').fit(xL)
            gR = Gaussian(covariance_type='diag').fit(xR)
            y.append(gL.divergence(gR))

        y = np.array(y)

        window = SlidingWindow(duration=2 * self.duration,
                               step=self.step,
                               start=0.)
        return SlidingWindowFeature(y, window)
Exemplo n.º 12
0
def speaker_spotting_try(current_trial):

    # target model
    model = models[current_trial['model_id']]
    # where to look for this target
    try_with = current_trial['try_with']

    # precomputed embedding
    embeddings = precomputed(current_trial)

    # find index of first and last embedding fully included in 'try_with'
    indices = embeddings.sliding_window.crop(try_with, mode='strict')
    first, last = indices[0], indices[-1]

    speech_timeline = SAD[current_trial['uri']]
    indices_speech = embeddings.sliding_window.crop(speech_timeline,
                                                    mode='center')

    # compare all embeddings to target model
    data = 2. - np.mean(
        cdist(embeddings.data, model, metric='cosine'), axis=1, keepdims=True)
    score = np.zeros((len(embeddings.data) + 2, 1))
    indices_speech = [
        indice for indice in indices_speech if indice < len(data)
    ]
    score[indices_speech] = data[indices_speech]
    score = score[first:last + 1]
    sliding_window = SlidingWindow(
        start=embeddings.sliding_window[first].start,
        duration=embeddings.sliding_window.duration,
        step=embeddings.sliding_window.step)

    return SlidingWindowFeature(score, sliding_window)
Exemplo n.º 13
0
    def __call__(self, current_file):
        """Obtain features for file

        Parameters
        ----------
        current_file : dict
            `pyannote.database` files.

        Returns
        -------
        features : `pyannote.core.SlidingWindowFeature`
            Features
        """

        path = Path(self.get_path(current_file))

        if not path.exists():
            uri = current_file["uri"]
            database = current_file["database"]
            msg = (
                f"Directory {self.root_dir} does not contain "
                f'precomputed features for file "{uri}" of '
                f'"{database}" database.'
            )
            raise PyannoteFeatureExtractionError(msg)

        if self.use_memmap:
            data = np.load(str(path), mmap_mode="r")
        else:
            data = np.load(str(path))

        return SlidingWindowFeature(data, self.sliding_window_)
Exemplo n.º 14
0
    def crop(self, current_file, segment, mode="center", fixed=None):
        """Fast version of self(current_file).crop(segment, **kwargs)

        Parameters
        ----------
        current_file : dict
            `pyannote.database` file.
        segment : `pyannote.core.Segment`
            Segment from which to extract features.

        Returns
        -------
        features : (n_frames, dimension) numpy array
            Extracted features

        See also
        --------
        `pyannote.core.SlidingWindowFeature.crop`
        """

        # match default FeatureExtraction.crop behavior
        if mode == "center" and fixed is None:
            fixed = segment.duration

        memmap = open_memmap(self.get_path(current_file), mode="r")
        swf = SlidingWindowFeature(memmap, self.sliding_window_)
        result = swf.crop(segment, mode=mode, fixed=fixed)
        del memmap
        return result
Exemplo n.º 15
0
def speaker_spotting_try(current_trial):

    # target model
    model = models[current_trial['model_id']]
    # where to look for this target
    try_with = current_trial['try_with']
    
    # precomputed embedding
    embeddings = precomputed(current_trial)
    
    # find index of first and last embedding fully included in 'try_with'
    indices = embeddings.sliding_window.crop(try_with, mode='strict')
    first, last = indices[0], indices[-1]
    
    speech_timeline = REFERENCE[current_trial['uri']].crop(current_trial['try_with']).get_timeline().support()
    indices_speech = embeddings.sliding_window.crop(speech_timeline, mode='strict')

    # compare all embeddings to target model
    scores = 2. - cdist(embeddings.data, model, metric='cosine')

    data = np.zeros((len(embeddings.data), 1))
    for i, (window, _) in enumerate(embeddings):
        # make sure the current segment is in 'try_with'
        if i < first or (i not in indices_speech):
            continue
        if i > last:
            break
        data[i] = scores[i]

    data = data[first:last+1] 
    sliding_window = SlidingWindow(start=embeddings.sliding_window[first].start,
                                   duration=embeddings.sliding_window.duration,
                                   step=embeddings.sliding_window.step)
    
    return SlidingWindowFeature(data, sliding_window)
Exemplo n.º 16
0
    def __call__(self, current_file) -> SlidingWindowFeature:
        """Extract features from file

        Parameters
        ----------
        current_file : dict
            `pyannote.database` files.

        Returns
        -------
        features : `pyannote.core.SlidingWindowFeature`
            Extracted features
        """

        # load waveform, re-sample, convert to mono, augment, normalize
        y, sample_rate = self.raw_audio_(current_file, return_sr=True)

        # compute features
        features = self.get_features(y.data, sample_rate)

        # basic quality check
        if np.any(np.isnan(features)):
            uri = get_unique_identifier(current_file)
            msg = f'Features extracted from "{uri}" contain NaNs.'
            warnings.warn(msg.format(uri=uri))

        # wrap features in a `SlidingWindowFeature` instance
        return SlidingWindowFeature(features, self.sliding_window)
Exemplo n.º 17
0
    def __call__(self, item):

        try:
            wav = item['wav']
            y, sample_rate, encoding = pysndfile.sndio.read(wav)
        except IOError as e:
            raise PyannoteFeatureExtractionError(e.message)

        if np.any(np.isnan(y)):
            uri = get_unique_identifier(item)
            msg = 'pysndfile output contains NaNs for file "{uri}".'
            raise PyannoteFeatureExtractionError(msg.format(uri=uri))

        # reshape before selecting channel
        if len(y.shape) < 2:
            y = y.reshape(-1, 1)

        channel = item.get('channel', 1)
        y = y[:, channel - 1]

        sliding_window = SlidingWindow(start=0.,
                                       duration=1. / sample_rate,
                                       step=1. / sample_rate)

        return SlidingWindowFeature(y, sliding_window)
Exemplo n.º 18
0
    def apply(self, wav):
        """

        Parameter
        ---------
        wav : str
            Path to wav audio file

        Returns
        -------
        predictions : SlidingWindowFeature

        """

        # apply sequence labeling to the whole file
        current_file = {'uri': wav, 'medium': {'wav': wav}}
        predictions = next(self.from_file(current_file))
        n_sequences, _, n_classes = predictions.shape

        # estimate total number of frames (over the duration of the whole file)
        # based on feature extractor internal sliding window and file duration
        samples_window = self.feature_extractor.sliding_window()
        n_samples = samples_window.samples(get_wav_duration(wav)) + 3

        # +3 is a hack to avoid later IndexError resulting from rounding error
        # when cropping samples_window

        # k[i] contains the number of sequences that overlap with frame #i
        k = np.zeros((n_samples, ), dtype=np.int8)

        # y[i] contains the sum of predictions for frame #i
        # over all overlapping samples
        y = np.zeros((n_samples, n_classes), dtype=np.float32)

        # sequence sliding window
        sequence_window = SlidingWindow(duration=self.duration, step=self.step)

        # accumulate predictions over all sequences
        for i in range(n_sequences):

            # position of sequence #i
            window = sequence_window[i]

            # indices of frames overlapped by sequence #i
            indices = samples_window.crop(window,
                                          mode='center',
                                          fixed=self.duration)

            # accumulate predictions
            # TODO - use smarter weights (e.g. Hamming window)
            k[indices] += 1
            y[indices] += predictions[i, :, :]

        # average prediction
        y = (y.T / np.maximum(k, 1)).T

        # returns the whole thing as SlidingWindowFeature
        return SlidingWindowFeature(y, samples_window)
def speaker_spotting_try_system4(current_trial):

    # target model
    model = {}
    model_id = current_trial['model_id']
    model_embedding = models[current_trial['model_id']]
    model['mid'] = model_id
    model['embedding'] = model_embedding
    # where to look for this target
    try_with = current_trial['try_with']

    # precomputed embedding
    embeddings = precomputed(current_trial)

    # find index of first and last embedding fully included in 'try_with'
    indices = embeddings.sliding_window.crop(try_with, mode='strict')
    speech_timeline = REFERENCE[current_trial['uri']].crop(
        current_trial['try_with']).get_timeline().support()
    indices_speech = embeddings.sliding_window.crop(speech_timeline,
                                                    mode='strict')
    first, last = indices[0], indices[-1]
    onlineClustering = clustering.OnlineClustering(
        current_trial['uri'],
        cdist(embeddings.data, embeddings.data, metric='cosine'))
    start = embeddings.sliding_window[0].start
    data = np.zeros((len(embeddings.data), 1))
    for i, (window, _) in enumerate(embeddings):
        if i < first or (i not in indices_speech):
            start = window.end
            continue
        if i > last:
            break
        so_far = Segment(start, window.end)
        score = 0.
        example = {}
        example['segment'] = so_far
        example['embedding'] = embeddings.crop(so_far, mode='center')
        example['indice'] = [i]
        example['distances'] = {}
        example['distances'][model['mid']] = list(
            cdist(example['embedding'], model['embedding'],
                  metric='cosine').flatten())

        onlineClustering.upadateCluster2(example)
        if not onlineClustering.empty():
            #min_dist = min(onlineClustering.computeDistances({'embedding': model}))
            min_dist = min(onlineClustering.modelClusterDistance(model))
            score = max(score, 2 - min_dist)
        data[i] = score
        start = window.end
    data = data[first:last + 1]
    sliding_window = SlidingWindow(
        start=embeddings.sliding_window[first].start,
        duration=embeddings.sliding_window.duration,
        step=embeddings.sliding_window.step)

    return SlidingWindowFeature(data, sliding_window)
Exemplo n.º 20
0
 def crop(self, item, focus, mode='loose', fixed=None, return_data=True):
     """Faster version of precomputed(item).crop(...)"""
     memmap = open_memmap(self.get_path(item), mode='r')
     swf = SlidingWindowFeature(memmap, self.sliding_window_)
     result = swf.crop(focus,
                       mode=mode,
                       fixed=fixed,
                       return_data=return_data)
     del memmap
     return result
Exemplo n.º 21
0
    def __call__(self, current_file):

        y, sample_rate = read_audio(current_file,
                                    sample_rate=self.sample_rate,
                                    mono=self.mono)

        sliding_window = SlidingWindow(start=0.,
                                       duration=1. / sample_rate,
                                       step=1. / sample_rate)

        return SlidingWindowFeature(y, sliding_window)
Exemplo n.º 22
0
    def get_features(self, y, sample_rate) -> np.ndarray:

        features = SlidingWindowFeature(
            self.feature_extraction_.get_features(y, sample_rate),
            self.feature_extraction_.sliding_window)

        return self.model_.slide(features,
                                 self.chunks_,
                                 batch_size=self.batch_size,
                                 device=self.device,
                                 return_intermediate=self.return_intermediate,
                                 progress_hook=self.progress_hook).data
Exemplo n.º 23
0
    def preprocess(self, current_file, identifier=None):
        """Pre-compute file-wise X and y"""

        if not hasattr(self, 'preprocessed_'):
            self.preprocessed_ = {}
            self.preprocessed_['X'] = {}
            self.preprocessed_['y'] = {}

        self.preprocessed_['X'][identifier] = current_file['features']

        # if labels have already been extracted, do nothing
        if identifier in self.preprocessed_.setdefault('y', {}):
            return current_file

        # get features as pyannote.core.SlidingWindowFeature instance
        X = self.preprocessed_['X'][identifier]
        sw = X.sliding_window
        n_samples = X.getNumber()
        #self.shape = (n_samples, dimension)

        annotated = get_annotated(current_file)
        annotation = current_file['annotation']
        prediction = current_file['prediction']

        if not hasattr(self, 'input_shape'):
            self.input_shape = (sw.samples(self.duration,
                                           mode='center'), X.data.shape[1])

        if self.source == 'annotation':
            n_classes = len(prediction.labels())
            self.n_classes = n_classes
            y = np.zeros((n_samples + 4, n_classes), dtype=np.int8)
            label_map = {
                label: idx
                for idx, label in enumerate(prediction.labels())
            }
        else:
            n_classes = len(prediction.labels()) + 1
            self.n_classes = n_classes
            y = np.zeros((n_samples + 4, n_classes), dtype=np.int8)
            label_map = {
                label: idx + 1
                for idx, label in enumerate(prediction.labels())
            }

        for segment, _, label in prediction.itertracks(label=True):
            indices = sw.crop(segment, mode='loose')
            y[indices, label_map[label]] = 1

        y = SlidingWindowFeature(y[:-1], sw)
        self.preprocessed_['y'][identifier] = y

        return current_file
Exemplo n.º 24
0
    def __call__(self, current_file: dict) -> Annotation:
        """Apply overlap detection

        Parameters
        ----------
        current_file : `dict`
            File as provided by a pyannote.database protocol. May contain a
            'ovl_scores' key providing precomputed scores.

        Returns
        -------
        overlap : `pyannote.core.Annotation`
            Overlap regions.
        """

        ovl_scores = self._scores(current_file)

        # if this check has not been done yet, do it once and for all
        if not hasattr(self, "log_scale_"):
            # heuristic to determine whether scores are log-scaled
            if np.nanmean(ovl_scores.data) < 0:
                self.log_scale_ = True
            else:
                self.log_scale_ = False

        data = np.exp(ovl_scores.data) if self.log_scale_ \
               else ovl_scores.data

        # overlap vs. non-overlap
        if data.shape[1] > 1:
            overlap_prob = SlidingWindowFeature(1. - data[:, 0],
                                                ovl_scores.sliding_window)
        else:
            overlap_prob = SlidingWindowFeature(data,
                                                ovl_scores.sliding_window)

        overlap = self._binarize.apply(overlap_prob)

        overlap.uri = current_file['uri']
        return overlap.to_annotation(generator='string', modality='overlap')
Exemplo n.º 25
0
    def apply(self, current_file):

        # extract precomputed scores
        precomputed = self.precomputed_(current_file)

        # if this check has not been done yet, do it once and for all
        if not hasattr(self, "log_scale_"):
            # heuristic to determine whether scores are log-scaled
            if np.nanmean(precomputed.data) < 0:
                self.log_scale_ = True
            else:
                self.log_scale_ = False

        data = np.exp(precomputed.data) if self.log_scale_ \
               else precomputed.data

        # speech vs. non-speech
        speech_prob = SlidingWindowFeature(1. - data[:, 0],
                                           precomputed.sliding_window)
        speech = self.speech_binarize_.apply(speech_prob)

        if self.has_overlap_:

            # overlap vs. non-overlap
            overlap_prob = SlidingWindowFeature(data[:, 2],
                                                precomputed.sliding_window)
            overlap = self.overlap_binarize_.apply(overlap_prob)

            # overlap speech can only happen in speech regions
            overlap = overlap.crop(speech)
        else:
            # empty timeline
            overlap = Timeline()

        speech = speech.to_annotation(generator='string')
        overlap = overlap.to_annotation(generator='int')
        hypothesis = speech.update(overlap)

        return hypothesis
    def __call__(self, current_file: dict) -> Annotation:
        """Apply speech activity detection

        Parameters
        ----------
        current_file : `dict`
            File as provided by a pyannote.database protocol. May contain a
            'sad_scores' key providing precomputed scores.

        Returns
        -------
        speech : `pyannote.core.Annotation`
            Speech regions.
        """

        sad_scores = self._scores(current_file)

        # if this check has not been done yet, do it once and for all
        if not hasattr(self, "log_scale_"):
            # heuristic to determine whether scores are log-scaled
            if np.nanmean(sad_scores.data) < 0:
                self.log_scale_ = True
            else:
                self.log_scale_ = False

        data = np.exp(sad_scores.data) if self.log_scale_ \
               else sad_scores.data

        # speech vs. non-speech
        if data.shape[1] > 1:
            speech_prob = SlidingWindowFeature(1. - data[:, 0],
                                               sad_scores.sliding_window)
        else:
            speech_prob = SlidingWindowFeature(data, sad_scores.sliding_window)

        speech = self._binarize.apply(speech_prob)

        speech.uri = current_file.get('uri', None)
        return speech.to_annotation(generator='string', modality='speech')
Exemplo n.º 27
0
    def __call__(self, item):

        path = self.get_path(self.root_dir, item)
        if not os.path.exists(path):
            uri = get_unique_identifier(item)
            print(uri)
            msg = 'No precomputed features for "{uri}".'
            raise PyannoteFeatureExtractionError(msg.format(uri=uri))

        f = h5py.File(path)
        data = np.array(f['array'])
        f.close()

        return SlidingWindowFeature(data, self.sliding_window_)
Exemplo n.º 28
0
    def apply(self, current_file, crop=None):
        """Extract embeddings

        Can process either pyannote.database protocol items (as dict) or
        batch of precomputed feature sequences (as numpy array).

        Parameters
        ----------
        current_file : dict or numpy array
            File (from pyannote.database protocol) or batch of precomputed
            feature sequences.
        crop : Segment or Timeline, optional
            When provided, only extract corresponding embeddings.

        Returns
        -------
        embedding : SlidingWindowFeature or numpy array
        """

        # if current_file is in fact a batch of feature sequences
        # use postprocess_ndarray directly.
        if isinstance(current_file, np.ndarray):
            return self.postprocess_ndarray(current_file)

        # HACK: change internal SlidingSegment's source to only extract
        # embeddings on provided "crop". keep track of original source
        # to set it back before the function returns
        source = self.generator.source
        if crop is not None:
            self.generator.source = crop

        # compute embedding on sliding window
        # over the whole duration of the source
        batches = [
            batch for batch in self.from_file(current_file, incomplete=True)
        ]

        self.generator.source = source

        if not batches:
            fX = np.zeros((0, self.dimension))
        else:
            fX = np.vstack(batches)

        if crop is not None:
            return fX

        subsequences = SlidingWindow(duration=self.duration, step=self.step)
        return SlidingWindowFeature(fX, subsequences)
Exemplo n.º 29
0
    def __call__(self, item):

        path = Path(self.get_path(item))

        if not path.exists():
            uri = get_unique_identifier(item)
            msg = f'No precomputed features for "{uri}".'
            raise PyannoteFeatureExtractionError(msg)

        if self.use_memmap:
            data = np.load(str(path), mmap_mode='r')
        else:
            data = np.load(str(path))

        return SlidingWindowFeature(data, self.sliding_window_)
Exemplo n.º 30
0
    def __call__(self, sequence=Stream.NoNewData):

        if isinstance(sequence, More):
            sequence = sequence.output

        if sequence in [Stream.NoNewData, Stream.EndOfStream]:
            return sequence

        X = sequence.data[np.newaxis, :, :]

        predicted = self.model.predict(X, batch_size=1)[0, :, :]
        if self.dimension is not None:
            predicted = predicted[:, self.dimension]

        return SlidingWindowFeature(predicted, sequence.sliding_window)